mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable indentation, bullets, emphasis, quotes etc?
   7  *
   8  *  - configurable choice of how to allocate node names?
   9  *     + possibly a template-like approach, choosing node names to
  10  *       be the full section title or perhaps the internal keyword?
  11  *     + neither of those seems quite right. Perhaps instead a
  12  *       Windows Help-like mechanism, where a magic config
  13  *       directive allows user choice of name for every node.
  14  *     + Only trouble with that is, now what happens to the section
  15  *       numbers? Do they become completely vestigial and just sit
  16  *       in the title text of each node? Or do we keep them in the
  17  *       menus somehow? I think people might occasionally want to
  18  *       go to a section by number, if only because all the _other_
  19  *       formats of the same document will reference the numbers
  20  *       all the time. So our menu lines could look like one of
  21  *       these:
  22  *        * Nodename: Section 1.2. Title of section.
  23  *        * Section 1.2: Nodename. Title of section.
  24  *
  25  *  - might be helpful to diagnose duplicate node names!
  26  */
  27
  28 /*
  29  * FIXME:
  30  *
  31  *  - alignment in the index is broken when a non-representable
  32  *    character appears with no alternative. More generally, I
  33  *    fear, this is the fault of the info_rdadd* functions failing
  34  *    to return correct width figures in this circumstance (so it
  35  *    will affect list paragraph prefixes and paragraph wrapping as
  36  *    well).
  37  */
  38
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <assert.h>
  42 #include "halibut.h"
  43
  44 typedef struct {
  45     char *filename;
  46     int maxfilesize;
  47     int charset;
  48 } infoconfig;
  49
  50 typedef struct {
  51     rdstringc output;
  52     int charset;
  53     charset_state state;
  54     int wcmode;
  55 } info_data;
  56 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  57 static const info_data empty_info_data = EMPTY_INFO_DATA;
  58
  59 typedef struct node_tag node;
  60 struct node_tag {
  61     node *listnext;
  62     node *up, *prev, *next, *lastchild;
  63     int pos, started_menu, filenum;
  64     char *name;
  65     info_data text;
  66 };
  67
  68 typedef struct {
  69     char *text;
  70     int length;
  71     int nnodes, nodesize;
  72     node **nodes;
  73 } info_idx;
  74
  75 static int info_rdadd(info_data *, wchar_t);
  76 static int info_rdadds(info_data *, wchar_t const *);
  77 static int info_rdaddc(info_data *, char);
  78 static int info_rdaddsc(info_data *, char const *);
  79
  80 static void info_heading(info_data *, word *, word *, int);
  81 static void info_rule(info_data *, int, int);
  82 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  83                       int, int, int);
  84 static void info_codepara(info_data *, word *, int, int);
  85 static void info_versionid(info_data *, word *);
  86 static void info_menu_item(info_data *, node *, paragraph *);
  87 static word *info_transform_wordlist(word *, keywordlist *);
  88 static int info_check_index(word *, node *, indexdata *);
  89
  90 static int info_rdaddwc(info_data *, word *, word *, int);
  91
  92 static node *info_node_new(char *name, int charset);
  93 static char *info_node_name(paragraph *p, int charset);
  94
  95 static infoconfig info_configure(paragraph *source) {
  96     infoconfig ret;
  97
  98     /*
  99      * Defaults.
 100      */
 101     ret.filename = dupstr("output.info");
 102     ret.maxfilesize = 64 << 10;
 103     ret.charset = CS_ASCII;
 104
 105     for (; source; source = source->next) {
 106         if (source->type == para_Config) {
 107             if (!ustricmp(source->keyword, L"info-filename")) {
 108                 sfree(ret.filename);
 109                 ret.filename = dupstr(adv(source->origkeyword));
 110             } else if (!ustricmp(source->keyword, L"info-charset")) {
 111                 char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
 112                 ret.charset = charset_from_localenc(csname);
 113                 sfree(csname);
 114             } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
 115                 ret.maxfilesize = utoi(uadv(source->keyword));
 116             }
 117         }
 118     }
 119
 120     return ret;
 121 }
 122
 123 paragraph *info_config_filename(char *filename)
 124 {
 125     return cmdline_cfg_simple("info-filename", filename, NULL);
 126 }
 127
 128 void info_backend(paragraph *sourceform, keywordlist *keywords,
 129                   indexdata *idx, void *unused) {
 130     paragraph *p;
 131     infoconfig conf;
 132     word *prefix, *body, *wp;
 133     word spaceword;
 134     wchar_t *prefixextra;
 135     int nesting, nestindent;
 136     int indentb, indenta;
 137     int filepos;
 138     int has_index;
 139     info_data intro_text = EMPTY_INFO_DATA;
 140     node *topnode, *currnode;
 141     word bullet;
 142     FILE *fp;
 143
 144     /*
 145      * FIXME: possibly configurability?
 146      */
 147     int width = 70, listindentbefore = 1, listindentafter = 3;
 148     int indent_code = 2, index_width = 40;
 149
 150     IGNORE(unused);
 151
 152     conf = info_configure(sourceform);
 153
 154     /*
 155      * Go through and create a node for each section.
 156      */
 157     topnode = info_node_new("Top", conf.charset);
 158     currnode = topnode;
 159     for (p = sourceform; p; p = p->next) switch (p->type) {
 160         /*
 161          * Chapter titles.
 162          */
 163       case para_Chapter:
 164       case para_Appendix:
 165       case para_UnnumberedChapter:
 166       case para_Heading:
 167       case para_Subsect:
 168         {
 169             node *newnode, *upnode;
 170             char *nodename;
 171
 172             nodename = info_node_name(p, conf.charset);
 173             newnode = info_node_new(nodename, conf.charset);
 174             sfree(nodename);
 175
 176             p->private_data = newnode;
 177
 178             if (p->parent)
 179                 upnode = (node *)p->parent->private_data;
 180             else
 181                 upnode = topnode;
 182             assert(upnode);
 183             newnode->up = upnode;
 184
 185             currnode->next = newnode;
 186             newnode->prev = currnode;
 187
 188             currnode->listnext = newnode;
 189             currnode = newnode;
 190         }
 191         break;
 192     }
 193
 194     /*
 195      * Set up the display form of each index entry.
 196      */
 197     {
 198         int i;
 199         indexentry *entry;
 200
 201         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 202             info_idx *ii = mknew(info_idx);
 203             info_data id = EMPTY_INFO_DATA;
 204
 205             id.charset = conf.charset;
 206
 207             ii->nnodes = ii->nodesize = 0;
 208             ii->nodes = NULL;
 209
 210             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE);
 211
 212             ii->text = id.output.text;
 213
 214             entry->backend_data = ii;
 215         }
 216     }
 217
 218     /*
 219      * An Info file begins with a piece of introductory text which
 220      * is apparently never shown anywhere. This seems to me to be a
 221      * good place to put the copyright notice and the version IDs.
 222      * Also, Info directory entries are expected to go here.
 223      */
 224     intro_text.charset = conf.charset;
 225
 226     info_rdaddsc(&intro_text,
 227             "This Info file generated by Halibut, ");
 228     info_rdaddsc(&intro_text, version);
 229     info_rdaddsc(&intro_text, "\n\n");
 230
 231     for (p = sourceform; p; p = p->next)
 232         if (p->type == para_Config &&
 233             !ustricmp(p->keyword, L"info-dir-entry")) {
 234             wchar_t *section, *shortname, *longname, *kw;
 235             char *s;
 236
 237             section = uadv(p->keyword);
 238             shortname = *section ? uadv(section) : NULL;
 239             longname = *shortname ? uadv(shortname) : NULL;
 240             kw = *longname ? uadv(longname) : NULL;
 241
 242             if (!*longname) {
 243                 error(err_infodirentry, &p->fpos);
 244                 continue;
 245             }
 246
 247             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 248             info_rdadds(&intro_text, section);
 249             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 250             info_rdadds(&intro_text, shortname);
 251             info_rdaddsc(&intro_text, ": (");
 252             s = dupstr(conf.filename);
 253             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 254                 s[strlen(s)-5] = '\0';
 255             info_rdaddsc(&intro_text, s);
 256             sfree(s);
 257             info_rdaddsc(&intro_text, ")");
 258             if (*kw) {
 259                 keyword *kwl = kw_lookup(keywords, kw);
 260                 if (kwl && kwl->para->private_data) {
 261                     node *n = (node *)kwl->para->private_data;
 262                     info_rdaddsc(&intro_text, n->name);
 263                 }
 264             }
 265             info_rdaddsc(&intro_text, ".   ");
 266             info_rdadds(&intro_text, longname);
 267             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 268         }
 269
 270     for (p = sourceform; p; p = p->next)
 271         if (p->type == para_Copyright)
 272             info_para(&intro_text, NULL, NULL, p->words, keywords,
 273                       0, 0, width);
 274
 275     for (p = sourceform; p; p = p->next)
 276         if (p->type == para_VersionID)
 277             info_versionid(&intro_text, p->words);
 278
 279     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 280         info_rdaddc(&intro_text, '\n');
 281
 282     /* Do the title */
 283     for (p = sourceform; p; p = p->next)
 284         if (p->type == para_Title)
 285             info_heading(&topnode->text, NULL, p->words, width);
 286
 287     nestindent = listindentbefore + listindentafter;
 288     nesting = 0;
 289
 290     currnode = topnode;
 291
 292     /* Do the main document */
 293     for (p = sourceform; p; p = p->next) switch (p->type) {
 294
 295       case para_QuotePush:
 296         nesting += 2;
 297         break;
 298       case para_QuotePop:
 299         nesting -= 2;
 300         assert(nesting >= 0);
 301         break;
 302
 303       case para_LcontPush:
 304         nesting += nestindent;
 305         break;
 306       case para_LcontPop:
 307         nesting -= nestindent;
 308         assert(nesting >= 0);
 309         break;
 310
 311         /*
 312          * Things we ignore because we've already processed them or
 313          * aren't going to touch them in this pass.
 314          */
 315       case para_IM:
 316       case para_BR:
 317       case para_Biblio:                /* only touch BiblioCited */
 318       case para_VersionID:
 319       case para_NoCite:
 320       case para_Title:
 321         break;
 322
 323         /*
 324          * Chapter titles.
 325          */
 326       case para_Chapter:
 327       case para_Appendix:
 328       case para_UnnumberedChapter:
 329       case para_Heading:
 330       case para_Subsect:
 331         currnode = p->private_data;
 332         assert(currnode);
 333         assert(currnode->up);
 334
 335         if (!currnode->up->started_menu) {
 336             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 337             currnode->up->started_menu = TRUE;
 338         }
 339         info_menu_item(&currnode->up->text, currnode, p);
 340
 341         has_index |= info_check_index(p->words, currnode, idx);
 342         info_heading(&currnode->text, p->kwtext, p->words, width);
 343         nesting = 0;
 344         break;
 345
 346       case para_Rule:
 347         info_rule(&currnode->text, nesting, width - nesting);
 348         break;
 349
 350       case para_Normal:
 351       case para_Copyright:
 352       case para_DescribedThing:
 353       case para_Description:
 354       case para_BiblioCited:
 355       case para_Bullet:
 356       case para_NumberedList:
 357         has_index |= info_check_index(p->words, currnode, idx);
 358         if (p->type == para_Bullet) {
 359             bullet.next = NULL;
 360             bullet.alt = NULL;
 361             bullet.type = word_Normal;
 362             bullet.text = L"-";        /* FIXME: configurability */
 363             prefix = &bullet;
 364             prefixextra = NULL;
 365             indentb = listindentbefore;
 366             indenta = listindentafter;
 367         } else if (p->type == para_NumberedList) {
 368             prefix = p->kwtext;
 369             prefixextra = L".";        /* FIXME: configurability */
 370             indentb = listindentbefore;
 371             indenta = listindentafter;
 372         } else if (p->type == para_Description) {
 373             prefix = NULL;
 374             prefixextra = NULL;
 375             indentb = listindentbefore;
 376             indenta = listindentafter;
 377         } else {
 378             prefix = NULL;
 379             prefixextra = NULL;
 380             indentb = indenta = 0;
 381         }
 382         if (p->type == para_BiblioCited) {
 383             body = dup_word_list(p->kwtext);
 384             for (wp = body; wp->next; wp = wp->next);
 385             wp->next = &spaceword;
 386             spaceword.next = p->words;
 387             spaceword.alt = NULL;
 388             spaceword.type = word_WhiteSpace;
 389             spaceword.text = NULL;
 390         } else {
 391             wp = NULL;
 392             body = p->words;
 393         }
 394         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 395                   nesting + indentb, indenta,
 396                   width - nesting - indentb - indenta);
 397         if (wp) {
 398             wp->next = NULL;
 399             free_word_list(body);
 400         }
 401         break;
 402
 403       case para_Code:
 404         info_codepara(&currnode->text, p->words,
 405                       nesting + indent_code,
 406                       width - nesting - 2 * indent_code);
 407         break;
 408     }
 409
 410     /*
 411      * Create an index node if required.
 412      */
 413     if (has_index) {
 414         node *newnode;
 415         int i, j, k;
 416         indexentry *entry;
 417
 418         newnode = info_node_new("Index", conf.charset);
 419         newnode->up = topnode;
 420
 421         currnode->next = newnode;
 422         newnode->prev = currnode;
 423         currnode->listnext = newnode;
 424
 425         info_rdaddsc(&newnode->text, "Index\n-----\n\n");
 426
 427         info_menu_item(&topnode->text, newnode, NULL);
 428
 429         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 430             info_idx *ii = (info_idx *)entry->backend_data;
 431
 432             for (j = 0; j < ii->nnodes; j++) {
 433                 /*
 434                  * When we have multiple references for a single
 435                  * index term, we only display the actual term on
 436                  * the first line, to make it clear that the terms
 437                  * really are the same.
 438                  */
 439                 if (j == 0)
 440                     info_rdaddsc(&newnode->text, ii->text);
 441                 for (k = (j ? 0 : ii->length); k < index_width; k++)
 442                     info_rdaddc(&newnode->text, ' ');
 443                 info_rdaddsc(&newnode->text, "   *Note ");
 444                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 445                 info_rdaddsc(&newnode->text, "::\n");
 446             }
 447         }
 448     }
 449
 450     /*
 451      * Finalise the text of each node, by adding the ^_ delimiter
 452      * and the node line at the top.
 453      */
 454     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 455         char *origtext = currnode->text.output.text;
 456         currnode->text = empty_info_data;
 457         currnode->text.charset = conf.charset;
 458         info_rdaddsc(&currnode->text, "\037\nFile: ");
 459         info_rdaddsc(&currnode->text, conf.filename);
 460         info_rdaddsc(&currnode->text, ",  Node: ");
 461         info_rdaddsc(&currnode->text, currnode->name);
 462         if (currnode->prev) {
 463             info_rdaddsc(&currnode->text, ",  Prev: ");
 464             info_rdaddsc(&currnode->text, currnode->prev->name);
 465         }
 466         info_rdaddsc(&currnode->text, ",  Up: ");
 467         info_rdaddsc(&currnode->text, (currnode->up ?
 468                                        currnode->up->name : "(dir)"));
 469         if (currnode->next) {
 470             info_rdaddsc(&currnode->text, ",  Next: ");
 471             info_rdaddsc(&currnode->text, currnode->next->name);
 472         }
 473         info_rdaddsc(&currnode->text, "\n\n");
 474         info_rdaddsc(&currnode->text, origtext);
 475         /*
 476          * Just make _absolutely_ sure we end with a newline.
 477          */
 478         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 479             info_rdaddc(&currnode->text, '\n');
 480
 481         sfree(origtext);
 482     }
 483
 484     /*
 485      * Compute the offsets for the tag table.
 486      */
 487     filepos = intro_text.output.pos;
 488     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 489         currnode->pos = filepos;
 490         filepos += currnode->text.output.pos;
 491     }
 492
 493     /*
 494      * Split into sub-files.
 495      */
 496     if (conf.maxfilesize > 0) {
 497         int currfilesize = intro_text.output.pos, currfilenum = 1;
 498         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 499             if (currfilesize > intro_text.output.pos &&
 500                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 501                 currfilenum++;
 502                 currfilesize = intro_text.output.pos;
 503             }
 504             currnode->filenum = currfilenum;
 505             currfilesize += currnode->text.output.pos;
 506         }
 507     }
 508
 509     /*
 510      * Write the primary output file.
 511      */
 512     fp = fopen(conf.filename, "w");
 513     if (!fp) {
 514         error(err_cantopenw, conf.filename);
 515         return;
 516     }
 517     fputs(intro_text.output.text, fp);
 518     if (conf.maxfilesize == 0) {
 519         for (currnode = topnode; currnode; currnode = currnode->listnext)
 520             fputs(currnode->text.output.text, fp);
 521     } else {
 522         int filenum = 0;
 523         fprintf(fp, "\037\nIndirect:\n");
 524         for (currnode = topnode; currnode; currnode = currnode->listnext)
 525             if (filenum != currnode->filenum) {
 526                 filenum = currnode->filenum;
 527                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 528                         currnode->pos);
 529             }
 530     }
 531     fprintf(fp, "\037\nTag Table:\n");
 532     if (conf.maxfilesize > 0)
 533         fprintf(fp, "(Indirect)\n");
 534     for (currnode = topnode; currnode; currnode = currnode->listnext)
 535         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 536     fprintf(fp, "\037\nEnd Tag Table\n");
 537     fclose(fp);
 538
 539     /*
 540      * Write the subfiles.
 541      */
 542     if (conf.maxfilesize > 0) {
 543         int filenum = 0;
 544         fp = NULL;
 545
 546         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 547             if (filenum != currnode->filenum) {
 548                 char *fname;
 549
 550                 filenum = currnode->filenum;
 551
 552                 if (fp)
 553                     fclose(fp);
 554                 fname = mknewa(char, strlen(conf.filename) + 40);
 555                 sprintf(fname, "%s-%d", conf.filename, filenum);
 556                 fp = fopen(fname, "w");
 557                 if (!fp) {
 558                     error(err_cantopenw, fname);
 559                     return;
 560                 }
 561                 sfree(fname);
 562                 fputs(intro_text.output.text, fp);
 563             }
 564             fputs(currnode->text.output.text, fp);
 565         }
 566
 567         if (fp)
 568             fclose(fp);
 569     }
 570 }
 571
 572 static int info_check_index(word *w, node *n, indexdata *idx)
 573 {
 574     int ret = 0;
 575
 576     for (; w; w = w->next) {
 577         if (w->type == word_IndexRef) {
 578             indextag *tag;
 579             int i;
 580
 581             tag = index_findtag(idx, w->text);
 582             if (!tag)
 583                 break;
 584
 585             for (i = 0; i < tag->nrefs; i++) {
 586                 indexentry *entry = tag->refs[i];
 587                 info_idx *ii = (info_idx *)entry->backend_data;
 588
 589                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 590                     /*
 591                      * If the same index term is indexed twice
 592                      * within the same section, we only want to
 593                      * mention it once in the index. So do nothing
 594                      * here.
 595                      */
 596                     continue;
 597                 }
 598
 599                 if (ii->nnodes >= ii->nodesize) {
 600                     ii->nodesize += 32;
 601                     ii->nodes = resize(ii->nodes, ii->nodesize);
 602                 }
 603
 604                 ii->nodes[ii->nnodes++] = n;
 605
 606                 ret = 1;
 607             }
 608         }
 609     }
 610
 611     return ret;
 612 }
 613
 614 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 615 {
 616     word *ret = dup_word_list(words);
 617     word *w;
 618     keyword *kwl;
 619
 620     for (w = ret; w; w = w->next) {
 621         w->private_data = NULL;
 622         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 623             kwl = kw_lookup(keywords, w->text);
 624             if (kwl) {
 625                 if (kwl->para->type == para_NumberedList ||
 626                     kwl->para->type == para_BiblioCited) {
 627                     /*
 628                      * In Info, we do nothing special for xrefs to
 629                      * numbered list items or bibliography entries.
 630                      */
 631                     break;
 632                 } else {
 633                     /*
 634                      * An xref to a different section has its text
 635                      * completely replaced.
 636                      */
 637                     word *w2, *w3, *w4;
 638                     w2 = w3 = w->next;
 639                     w4 = NULL;
 640                     while (w2) {
 641                         if (w2->type == word_XrefEnd) {
 642                             w4 = w2->next;
 643                             w2->next = NULL;
 644                             break;
 645                         }
 646                         w2 = w2->next;
 647                     }
 648                     free_word_list(w3);
 649
 650                     /*
 651                      * Now w is the UpperXref / LowerXref we
 652                      * started with, and w4 is the next word after
 653                      * the corresponding XrefEnd (if any). The
 654                      * simplest thing is just to stick a pointer to
 655                      * the target node structure in the private
 656                      * data field of the xref word, and let
 657                      * info_rdaddwc and friends read the node name
 658                      * out from there.
 659                      */
 660                     w->next = w4;
 661                     w->private_data = kwl->para->private_data;
 662                     assert(w->private_data);
 663                 }
 664             }
 665         }
 666     }
 667
 668     return ret;
 669 }
 670
 671 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs) {
 672     int ret = 0;
 673
 674     for (; words && words != end; words = words->next) switch (words->type) {
 675       case word_HyperLink:
 676       case word_HyperEnd:
 677       case word_XrefEnd:
 678       case word_IndexRef:
 679         break;
 680
 681       case word_Normal:
 682       case word_Emph:
 683       case word_Code:
 684       case word_WeakCode:
 685       case word_WhiteSpace:
 686       case word_EmphSpace:
 687       case word_CodeSpace:
 688       case word_WkCodeSpace:
 689       case word_Quote:
 690       case word_EmphQuote:
 691       case word_CodeQuote:
 692       case word_WkCodeQuote:
 693         assert(words->type != word_CodeQuote &&
 694                words->type != word_WkCodeQuote);
 695         if (towordstyle(words->type) == word_Emph &&
 696             (attraux(words->aux) == attr_First ||
 697              attraux(words->aux) == attr_Only))
 698             ret += info_rdadd(id, L'_');      /* FIXME: configurability */
 699         else if (towordstyle(words->type) == word_Code &&
 700                  (attraux(words->aux) == attr_First ||
 701                   attraux(words->aux) == attr_Only))
 702             ret += info_rdadd(id, L'`');      /* FIXME: configurability */
 703         if (removeattr(words->type) == word_Normal) {
 704             if (cvt_ok(id->charset, words->text) || !words->alt)
 705                 ret += info_rdadds(id, words->text);
 706             else
 707                 ret += info_rdaddwc(id, words->alt, NULL, FALSE);
 708         } else if (removeattr(words->type) == word_WhiteSpace) {
 709             ret += info_rdadd(id, L' ');
 710         } else if (removeattr(words->type) == word_Quote) {
 711             ret += info_rdadd(id, quoteaux(words->aux) == quote_Open ? L'`' : L'\'');
 712                                        /* FIXME: configurability */
 713         }
 714         if (towordstyle(words->type) == word_Emph &&
 715             (attraux(words->aux) == attr_Last ||
 716              attraux(words->aux) == attr_Only))
 717             ret += info_rdadd(id, L'_');     /* FIXME: configurability */
 718         else if (towordstyle(words->type) == word_Code &&
 719                  (attraux(words->aux) == attr_Last ||
 720                   attraux(words->aux) == attr_Only))
 721             ret += info_rdadd(id, L'\'');     /* FIXME: configurability */
 722         break;
 723
 724       case word_UpperXref:
 725       case word_LowerXref:
 726         if (xrefs && words->private_data) {
 727             /*
 728              * This bit is structural and so must be done in char
 729              * rather than wchar_t.
 730              */
 731             ret += info_rdaddsc(id, "*Note ");
 732             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 733             ret += info_rdaddsc(id, "::");
 734         }
 735         break;
 736     }
 737
 738     return ret;
 739 }
 740
 741 static int info_width_internal(word *words, int xrefs, int charset);
 742
 743 static int info_width_internal_list(word *words, int xrefs, int charset) {
 744     int w = 0;
 745     while (words) {
 746         w += info_width_internal(words, xrefs, charset);
 747         words = words->next;
 748     }
 749     return w;
 750 }
 751
 752 static int info_width_internal(word *words, int xrefs, int charset) {
 753     switch (words->type) {
 754       case word_HyperLink:
 755       case word_HyperEnd:
 756       case word_XrefEnd:
 757       case word_IndexRef:
 758         return 0;
 759
 760       case word_Normal:
 761       case word_Emph:
 762       case word_Code:
 763       case word_WeakCode:
 764         return (((words->type == word_Emph ||
 765                   words->type == word_Code)
 766                  ? (attraux(words->aux) == attr_Only ? 2 :
 767                     attraux(words->aux) == attr_Always ? 0 : 1)
 768                  : 0) +
 769                 (cvt_ok(charset, words->text) || !words->alt ?
 770                  ustrlen(words->text) :
 771                  info_width_internal_list(words->alt, xrefs, charset)));
 772
 773       case word_WhiteSpace:
 774       case word_EmphSpace:
 775       case word_CodeSpace:
 776       case word_WkCodeSpace:
 777       case word_Quote:
 778       case word_EmphQuote:
 779       case word_CodeQuote:
 780       case word_WkCodeQuote:
 781         assert(words->type != word_CodeQuote &&
 782                words->type != word_WkCodeQuote);
 783         return (((towordstyle(words->type) == word_Emph ||
 784                   towordstyle(words->type) == word_Code)
 785                  ? (attraux(words->aux) == attr_Only ? 2 :
 786                     attraux(words->aux) == attr_Always ? 0 : 1)
 787                  : 0) + 1);
 788
 789       case word_UpperXref:
 790       case word_LowerXref:
 791         if (xrefs && words->private_data) {
 792             /* "*Note " plus "::" comes to 8 characters */
 793             return 8 + strlen(((node *)words->private_data)->name);
 794         }
 795         break;
 796     }
 797     return 0;                          /* should never happen */
 798 }
 799
 800 static int info_width_noxrefs(void *ctx, word *words)
 801 {
 802     return info_width_internal(words, FALSE, *(int *)ctx);
 803 }
 804 static int info_width_xrefs(void *ctx, word *words)
 805 {
 806     return info_width_internal(words, TRUE, *(int *)ctx);
 807 }
 808
 809 static void info_heading(info_data *text, word *tprefix,
 810                          word *words, int width) {
 811     int length;
 812     int firstlinewidth, wrapwidth;
 813     wrappedline *wrapping, *p;
 814
 815     length = 0;
 816     if (tprefix) {
 817         length += info_rdaddwc(text, tprefix, NULL, FALSE);
 818         length += info_rdadds(text, L": ");/* FIXME: configurability */
 819     }
 820
 821     wrapwidth = width;
 822     firstlinewidth = width - length;
 823
 824     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
 825                          info_width_noxrefs, &text->charset, 0);
 826     for (p = wrapping; p; p = p->next) {
 827         length += info_rdaddwc(text, p->begin, p->end, FALSE);
 828         info_rdadd(text, L'\n');
 829         while (length--)
 830             info_rdadd(text, L'-');  /* FIXME: configurability */
 831         info_rdadd(text, L'\n');
 832         length = 0;
 833     }
 834     wrap_free(wrapping);
 835     info_rdadd(text, L'\n');
 836 }
 837
 838 static void info_rule(info_data *text, int indent, int width) {
 839     while (indent--) info_rdadd(text, L' ');
 840     while (width--) info_rdadd(text, L'-');
 841     info_rdadd(text, L'\n');
 842     info_rdadd(text, L'\n');
 843 }
 844
 845 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 846                       word *input, keywordlist *keywords,
 847                       int indent, int extraindent, int width) {
 848     wrappedline *wrapping, *p;
 849     word *words;
 850     int e;
 851     int i;
 852     int firstlinewidth = width;
 853
 854     words = info_transform_wordlist(input, keywords);
 855
 856     if (prefix) {
 857         for (i = 0; i < indent; i++)
 858             info_rdadd(text, L' ');
 859         e = info_rdaddwc(text, prefix, NULL, FALSE);
 860         if (prefixextra)
 861             e += info_rdadds(text, prefixextra);
 862         /* If the prefix is too long, shorten the first line to fit. */
 863         e = extraindent - e;
 864         if (e < 0) {
 865             firstlinewidth += e;       /* this decreases it, since e < 0 */
 866             if (firstlinewidth < 0) {
 867                 e = indent + extraindent;
 868                 firstlinewidth = width;
 869                 info_rdadd(text, L'\n');
 870             } else
 871                 e = 0;
 872         }
 873     } else
 874         e = indent + extraindent;
 875
 876     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
 877                          &text->charset, 0);
 878     for (p = wrapping; p; p = p->next) {
 879         for (i = 0; i < e; i++)
 880             info_rdadd(text, L' ');
 881         info_rdaddwc(text, p->begin, p->end, TRUE);
 882         info_rdadd(text, L'\n');
 883         e = indent + extraindent;
 884     }
 885     wrap_free(wrapping);
 886     info_rdadd(text, L'\n');
 887
 888     free_word_list(words);
 889 }
 890
 891 static void info_codepara(info_data *text, word *words,
 892                           int indent, int width) {
 893     int i;
 894
 895     for (; words; words = words->next) if (words->type == word_WeakCode) {
 896         for (i = 0; i < indent; i++)
 897             info_rdadd(text, L' ');
 898         if (info_rdadds(text, words->text) > width) {
 899             /* FIXME: warn */
 900         }
 901         info_rdadd(text, L'\n');
 902     }
 903
 904     info_rdadd(text, L'\n');
 905 }
 906
 907 static void info_versionid(info_data *text, word *words) {
 908     info_rdadd(text, L'[');                    /* FIXME: configurability */
 909     info_rdaddwc(text, words, NULL, FALSE);
 910     info_rdadds(text, L"]\n");
 911 }
 912
 913 static node *info_node_new(char *name, int charset)
 914 {
 915     node *n;
 916
 917     n = mknew(node);
 918     n->text = empty_info_data;
 919     n->text.charset = charset;
 920     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
 921     n->name = dupstr(name);
 922     n->started_menu = FALSE;
 923
 924     return n;
 925 }
 926
 927 static char *info_node_name(paragraph *par, int charset)
 928 {
 929     info_data id = EMPTY_INFO_DATA;
 930     char *p, *q;
 931
 932     id.charset = charset;
 933     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, NULL, FALSE);
 934     info_rdaddsc(&id, NULL);
 935
 936     /*
 937      * We cannot have commas or colons in a node name. Remove any
 938      * that we find, with a warning.
 939      */
 940     p = q = id.output.text;
 941     while (*p) {
 942         if (*p == ':' || *p == ',') {
 943             error(err_infonodechar, &par->fpos, *p);
 944         } else {
 945             *q++ = *p;
 946         }
 947         p++;
 948     }
 949     *p = '\0';
 950
 951     return id.output.text;
 952 }
 953
 954 static void info_menu_item(info_data *text, node *n, paragraph *p)
 955 {
 956     /*
 957      * FIXME: Depending on how we're doing node names in this info
 958      * file, we might want to do
 959      *
 960      *   * Node name:: Chapter title
 961      *
 962      * _or_
 963      *
 964      *   * Chapter number: Node name.
 965      *
 966      * This function mostly works in char rather than wchar_t,
 967      * because a menu item is a structural component.
 968      */
 969     info_rdaddsc(text, "* ");
 970     info_rdaddsc(text, n->name);
 971     info_rdaddsc(text, "::");
 972     if (p) {
 973         info_rdaddc(text, ' ');
 974         info_rdaddwc(text, p->words, NULL, FALSE);
 975     }
 976     info_rdaddc(text, '\n');
 977 }
 978
 979 /*
 980  * These functions implement my wrapper on the rdadd* calls which
 981  * allows me to switch arbitrarily between literal octet-string
 982  * text and charset-translated Unicode. (Because no matter what
 983  * character set I write the actual text in, I expect info readers
 984  * to treat node names and file names literally and to expect
 985  * keywords like `*Note' in their canonical form, so I have to take
 986  * steps to ensure that those structural elements of the file
 987  * aren't messed with.)
 988  */
 989 static int info_rdadds(info_data *d, wchar_t const *wcs)
 990 {
 991     if (!d->wcmode) {
 992         d->state = charset_init_state;
 993         d->wcmode = TRUE;
 994     }
 995
 996     if (wcs) {
 997         char buf[256];
 998         int len, origlen, ret;
 999
1000         origlen = len = ustrlen(wcs);
1001         while (len > 0) {
1002             int prevlen = len;
1003
1004             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1005                                        d->charset, &d->state, NULL);
1006
1007             assert(len < prevlen);
1008
1009             if (ret > 0) {
1010                 buf[ret] = '\0';
1011                 rdaddsc(&d->output, buf);
1012             }
1013         }
1014
1015         return origlen;
1016     } else
1017         return 0;
1018 }
1019
1020 static int info_rdaddsc(info_data *d, char const *cs)
1021 {
1022     if (d->wcmode) {
1023         char buf[256];
1024         int ret;
1025
1026         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1027                                    d->charset, &d->state, NULL);
1028         if (ret > 0) {
1029             buf[ret] = '\0';
1030             rdaddsc(&d->output, buf);
1031         }
1032
1033         d->wcmode = FALSE;
1034     }
1035
1036     if (cs) {
1037         rdaddsc(&d->output, cs);
1038         return strlen(cs);
1039     } else
1040         return 0;
1041 }
1042
1043 static int info_rdadd(info_data *d, wchar_t wc)
1044 {
1045     wchar_t wcs[2];
1046     wcs[0] = wc;
1047     wcs[1] = L'\0';
1048     return info_rdadds(d, wcs);
1049 }
1050
1051 static int info_rdaddc(info_data *d, char c)
1052 {
1053     char cs[2];
1054     cs[0] = c;
1055     cs[1] = '\0';
1056     return info_rdaddsc(d, cs);
1057 }