mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable indentation, bullets, emphasis, quotes etc?
   7  *
   8  *  - configurable choice of how to allocate node names?
   9  *     + possibly a template-like approach, choosing node names to
  10  *       be the full section title or perhaps the internal keyword?
  11  *     + neither of those seems quite right. Perhaps instead a
  12  *       Windows Help-like mechanism, where a magic config
  13  *       directive allows user choice of name for every node.
  14  *     + Only trouble with that is, now what happens to the section
  15  *       numbers? Do they become completely vestigial and just sit
  16  *       in the title text of each node? Or do we keep them in the
  17  *       menus somehow? I think people might occasionally want to
  18  *       go to a section by number, if only because all the _other_
  19  *       formats of the same document will reference the numbers
  20  *       all the time. So our menu lines could look like one of
  21  *       these:
  22  *        * Nodename: Section 1.2. Title of section.
  23  *        * Section 1.2: Nodename. Title of section.
  24  *
  25  *  - might be helpful to diagnose duplicate node names!
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <assert.h>
  31 #include "halibut.h"
  32
  33 typedef struct {
  34     char *filename;
  35     int maxfilesize;
  36     int charset;
  37 } infoconfig;
  38
  39 typedef struct {
  40     rdstringc output;
  41     int charset;
  42     charset_state state;
  43     int wcmode;
  44 } info_data;
  45 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  46 static const info_data empty_info_data = EMPTY_INFO_DATA;
  47
  48 typedef struct node_tag node;
  49 struct node_tag {
  50     node *listnext;
  51     node *up, *prev, *next, *lastchild;
  52     int pos, started_menu, filenum;
  53     char *name;
  54     info_data text;
  55 };
  56
  57 typedef struct {
  58     char *text;
  59     int length;
  60     int nnodes, nodesize;
  61     node **nodes;
  62 } info_idx;
  63
  64 static int info_rdadd(info_data *, wchar_t);
  65 static int info_rdadds(info_data *, wchar_t const *);
  66 static int info_rdaddc(info_data *, char);
  67 static int info_rdaddsc(info_data *, char const *);
  68
  69 static void info_heading(info_data *, word *, word *, int);
  70 static void info_rule(info_data *, int, int);
  71 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  72                       int, int, int);
  73 static void info_codepara(info_data *, word *, int, int);
  74 static void info_versionid(info_data *, word *);
  75 static void info_menu_item(info_data *, node *, paragraph *);
  76 static word *info_transform_wordlist(word *, keywordlist *);
  77 static int info_check_index(word *, node *, indexdata *);
  78
  79 static int info_rdaddwc(info_data *, word *, word *, int);
  80
  81 static node *info_node_new(char *name, int charset);
  82 static char *info_node_name(paragraph *p, int charset);
  83
  84 static infoconfig info_configure(paragraph *source) {
  85     infoconfig ret;
  86
  87     /*
  88      * Defaults.
  89      */
  90     ret.filename = dupstr("output.info");
  91     ret.maxfilesize = 64 << 10;
  92     ret.charset = CS_ASCII;
  93
  94     for (; source; source = source->next) {
  95         if (source->type == para_Config) {
  96             if (!ustricmp(source->keyword, L"info-filename")) {
  97                 sfree(ret.filename);
  98                 ret.filename = dupstr(adv(source->origkeyword));
  99             } else if (!ustricmp(source->keyword, L"info-charset")) {
 100                 char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
 101                 ret.charset = charset_from_localenc(csname);
 102                 sfree(csname);
 103             } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
 104                 ret.maxfilesize = utoi(uadv(source->keyword));
 105             }
 106         }
 107     }
 108
 109     return ret;
 110 }
 111
 112 paragraph *info_config_filename(char *filename)
 113 {
 114     return cmdline_cfg_simple("info-filename", filename, NULL);
 115 }
 116
 117 void info_backend(paragraph *sourceform, keywordlist *keywords,
 118                   indexdata *idx, void *unused) {
 119     paragraph *p;
 120     infoconfig conf;
 121     word *prefix, *body, *wp;
 122     word spaceword;
 123     wchar_t *prefixextra;
 124     int nesting, nestindent;
 125     int indentb, indenta;
 126     int filepos;
 127     int has_index;
 128     info_data intro_text = EMPTY_INFO_DATA;
 129     node *topnode, *currnode;
 130     word bullet;
 131     FILE *fp;
 132
 133     /*
 134      * FIXME: possibly configurability?
 135      */
 136     int width = 70, listindentbefore = 1, listindentafter = 3;
 137     int indent_code = 2, index_width = 40;
 138
 139     IGNORE(unused);
 140
 141     conf = info_configure(sourceform);
 142
 143     /*
 144      * Go through and create a node for each section.
 145      */
 146     topnode = info_node_new("Top", conf.charset);
 147     currnode = topnode;
 148     for (p = sourceform; p; p = p->next) switch (p->type) {
 149         /*
 150          * Chapter titles.
 151          */
 152       case para_Chapter:
 153       case para_Appendix:
 154       case para_UnnumberedChapter:
 155       case para_Heading:
 156       case para_Subsect:
 157         {
 158             node *newnode, *upnode;
 159             char *nodename;
 160
 161             nodename = info_node_name(p, conf.charset);
 162             newnode = info_node_new(nodename, conf.charset);
 163             sfree(nodename);
 164
 165             p->private_data = newnode;
 166
 167             if (p->parent)
 168                 upnode = (node *)p->parent->private_data;
 169             else
 170                 upnode = topnode;
 171             assert(upnode);
 172             newnode->up = upnode;
 173
 174             currnode->next = newnode;
 175             newnode->prev = currnode;
 176
 177             currnode->listnext = newnode;
 178             currnode = newnode;
 179         }
 180         break;
 181     }
 182
 183     /*
 184      * Set up the display form of each index entry.
 185      */
 186     {
 187         int i;
 188         indexentry *entry;
 189
 190         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 191             info_idx *ii = mknew(info_idx);
 192             info_data id = EMPTY_INFO_DATA;
 193
 194             id.charset = conf.charset;
 195
 196             ii->nnodes = ii->nodesize = 0;
 197             ii->nodes = NULL;
 198
 199             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE);
 200
 201             ii->text = id.output.text;
 202
 203             entry->backend_data = ii;
 204         }
 205     }
 206
 207     /*
 208      * An Info file begins with a piece of introductory text which
 209      * is apparently never shown anywhere. This seems to me to be a
 210      * good place to put the copyright notice and the version IDs.
 211      * Also, Info directory entries are expected to go here.
 212      */
 213     intro_text.charset = conf.charset;
 214
 215     info_rdaddsc(&intro_text,
 216             "This Info file generated by Halibut, ");
 217     info_rdaddsc(&intro_text, version);
 218     info_rdaddsc(&intro_text, "\n\n");
 219
 220     for (p = sourceform; p; p = p->next)
 221         if (p->type == para_Config &&
 222             !ustricmp(p->keyword, L"info-dir-entry")) {
 223             wchar_t *section, *shortname, *longname, *kw;
 224             char *s;
 225
 226             section = uadv(p->keyword);
 227             shortname = *section ? uadv(section) : NULL;
 228             longname = *shortname ? uadv(shortname) : NULL;
 229             kw = *longname ? uadv(longname) : NULL;
 230
 231             if (!*longname) {
 232                 error(err_infodirentry, &p->fpos);
 233                 continue;
 234             }
 235
 236             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 237             info_rdadds(&intro_text, section);
 238             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 239             info_rdadds(&intro_text, shortname);
 240             info_rdaddsc(&intro_text, ": (");
 241             s = dupstr(conf.filename);
 242             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 243                 s[strlen(s)-5] = '\0';
 244             info_rdaddsc(&intro_text, s);
 245             sfree(s);
 246             info_rdaddsc(&intro_text, ")");
 247             if (*kw) {
 248                 keyword *kwl = kw_lookup(keywords, kw);
 249                 if (kwl && kwl->para->private_data) {
 250                     node *n = (node *)kwl->para->private_data;
 251                     info_rdaddsc(&intro_text, n->name);
 252                 }
 253             }
 254             info_rdaddsc(&intro_text, ".   ");
 255             info_rdadds(&intro_text, longname);
 256             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 257         }
 258
 259     for (p = sourceform; p; p = p->next)
 260         if (p->type == para_Copyright)
 261             info_para(&intro_text, NULL, NULL, p->words, keywords,
 262                       0, 0, width);
 263
 264     for (p = sourceform; p; p = p->next)
 265         if (p->type == para_VersionID)
 266             info_versionid(&intro_text, p->words);
 267
 268     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 269         info_rdaddc(&intro_text, '\n');
 270
 271     /* Do the title */
 272     for (p = sourceform; p; p = p->next)
 273         if (p->type == para_Title)
 274             info_heading(&topnode->text, NULL, p->words, width);
 275
 276     nestindent = listindentbefore + listindentafter;
 277     nesting = 0;
 278
 279     currnode = topnode;
 280
 281     /* Do the main document */
 282     for (p = sourceform; p; p = p->next) switch (p->type) {
 283
 284       case para_QuotePush:
 285         nesting += 2;
 286         break;
 287       case para_QuotePop:
 288         nesting -= 2;
 289         assert(nesting >= 0);
 290         break;
 291
 292       case para_LcontPush:
 293         nesting += nestindent;
 294         break;
 295       case para_LcontPop:
 296         nesting -= nestindent;
 297         assert(nesting >= 0);
 298         break;
 299
 300         /*
 301          * Things we ignore because we've already processed them or
 302          * aren't going to touch them in this pass.
 303          */
 304       case para_IM:
 305       case para_BR:
 306       case para_Biblio:                /* only touch BiblioCited */
 307       case para_VersionID:
 308       case para_NoCite:
 309       case para_Title:
 310         break;
 311
 312         /*
 313          * Chapter titles.
 314          */
 315       case para_Chapter:
 316       case para_Appendix:
 317       case para_UnnumberedChapter:
 318       case para_Heading:
 319       case para_Subsect:
 320         currnode = p->private_data;
 321         assert(currnode);
 322         assert(currnode->up);
 323
 324         if (!currnode->up->started_menu) {
 325             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 326             currnode->up->started_menu = TRUE;
 327         }
 328         info_menu_item(&currnode->up->text, currnode, p);
 329
 330         has_index |= info_check_index(p->words, currnode, idx);
 331         info_heading(&currnode->text, p->kwtext, p->words, width);
 332         nesting = 0;
 333         break;
 334
 335       case para_Rule:
 336         info_rule(&currnode->text, nesting, width - nesting);
 337         break;
 338
 339       case para_Normal:
 340       case para_Copyright:
 341       case para_DescribedThing:
 342       case para_Description:
 343       case para_BiblioCited:
 344       case para_Bullet:
 345       case para_NumberedList:
 346         has_index |= info_check_index(p->words, currnode, idx);
 347         if (p->type == para_Bullet) {
 348             bullet.next = NULL;
 349             bullet.alt = NULL;
 350             bullet.type = word_Normal;
 351             bullet.text = L"-";        /* FIXME: configurability */
 352             prefix = &bullet;
 353             prefixextra = NULL;
 354             indentb = listindentbefore;
 355             indenta = listindentafter;
 356         } else if (p->type == para_NumberedList) {
 357             prefix = p->kwtext;
 358             prefixextra = L".";        /* FIXME: configurability */
 359             indentb = listindentbefore;
 360             indenta = listindentafter;
 361         } else if (p->type == para_Description) {
 362             prefix = NULL;
 363             prefixextra = NULL;
 364             indentb = listindentbefore;
 365             indenta = listindentafter;
 366         } else {
 367             prefix = NULL;
 368             prefixextra = NULL;
 369             indentb = indenta = 0;
 370         }
 371         if (p->type == para_BiblioCited) {
 372             body = dup_word_list(p->kwtext);
 373             for (wp = body; wp->next; wp = wp->next);
 374             wp->next = &spaceword;
 375             spaceword.next = p->words;
 376             spaceword.alt = NULL;
 377             spaceword.type = word_WhiteSpace;
 378             spaceword.text = NULL;
 379         } else {
 380             wp = NULL;
 381             body = p->words;
 382         }
 383         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 384                   nesting + indentb, indenta,
 385                   width - nesting - indentb - indenta);
 386         if (wp) {
 387             wp->next = NULL;
 388             free_word_list(body);
 389         }
 390         break;
 391
 392       case para_Code:
 393         info_codepara(&currnode->text, p->words,
 394                       nesting + indent_code,
 395                       width - nesting - 2 * indent_code);
 396         break;
 397     }
 398
 399     /*
 400      * Create an index node if required.
 401      */
 402     if (has_index) {
 403         node *newnode;
 404         int i, j, k;
 405         indexentry *entry;
 406
 407         newnode = info_node_new("Index", conf.charset);
 408         newnode->up = topnode;
 409
 410         currnode->next = newnode;
 411         newnode->prev = currnode;
 412         currnode->listnext = newnode;
 413
 414         info_rdaddsc(&newnode->text, "Index\n-----\n\n");
 415
 416         info_menu_item(&topnode->text, newnode, NULL);
 417
 418         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 419             info_idx *ii = (info_idx *)entry->backend_data;
 420
 421             for (j = 0; j < ii->nnodes; j++) {
 422                 /*
 423                  * When we have multiple references for a single
 424                  * index term, we only display the actual term on
 425                  * the first line, to make it clear that the terms
 426                  * really are the same.
 427                  */
 428                 if (j == 0)
 429                     info_rdaddsc(&newnode->text, ii->text);
 430                 for (k = (j ? 0 : ii->length); k < index_width; k++)
 431                     info_rdaddc(&newnode->text, ' ');
 432                 info_rdaddsc(&newnode->text, "   *Note ");
 433                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 434                 info_rdaddsc(&newnode->text, "::\n");
 435             }
 436         }
 437     }
 438
 439     /*
 440      * Finalise the text of each node, by adding the ^_ delimiter
 441      * and the node line at the top.
 442      */
 443     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 444         char *origtext = currnode->text.output.text;
 445         currnode->text = empty_info_data;
 446         currnode->text.charset = conf.charset;
 447         info_rdaddsc(&currnode->text, "\037\nFile: ");
 448         info_rdaddsc(&currnode->text, conf.filename);
 449         info_rdaddsc(&currnode->text, ",  Node: ");
 450         info_rdaddsc(&currnode->text, currnode->name);
 451         if (currnode->prev) {
 452             info_rdaddsc(&currnode->text, ",  Prev: ");
 453             info_rdaddsc(&currnode->text, currnode->prev->name);
 454         }
 455         info_rdaddsc(&currnode->text, ",  Up: ");
 456         info_rdaddsc(&currnode->text, (currnode->up ?
 457                                        currnode->up->name : "(dir)"));
 458         if (currnode->next) {
 459             info_rdaddsc(&currnode->text, ",  Next: ");
 460             info_rdaddsc(&currnode->text, currnode->next->name);
 461         }
 462         info_rdaddsc(&currnode->text, "\n\n");
 463         info_rdaddsc(&currnode->text, origtext);
 464         /*
 465          * Just make _absolutely_ sure we end with a newline.
 466          */
 467         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 468             info_rdaddc(&currnode->text, '\n');
 469
 470         sfree(origtext);
 471     }
 472
 473     /*
 474      * Compute the offsets for the tag table.
 475      */
 476     filepos = intro_text.output.pos;
 477     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 478         currnode->pos = filepos;
 479         filepos += currnode->text.output.pos;
 480     }
 481
 482     /*
 483      * Split into sub-files.
 484      */
 485     if (conf.maxfilesize > 0) {
 486         int currfilesize = intro_text.output.pos, currfilenum = 1;
 487         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 488             if (currfilesize > intro_text.output.pos &&
 489                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 490                 currfilenum++;
 491                 currfilesize = intro_text.output.pos;
 492             }
 493             currnode->filenum = currfilenum;
 494             currfilesize += currnode->text.output.pos;
 495         }
 496     }
 497
 498     /*
 499      * Write the primary output file.
 500      */
 501     fp = fopen(conf.filename, "w");
 502     if (!fp) {
 503         error(err_cantopenw, conf.filename);
 504         return;
 505     }
 506     fputs(intro_text.output.text, fp);
 507     if (conf.maxfilesize == 0) {
 508         for (currnode = topnode; currnode; currnode = currnode->listnext)
 509             fputs(currnode->text.output.text, fp);
 510     } else {
 511         int filenum = 0;
 512         fprintf(fp, "\037\nIndirect:\n");
 513         for (currnode = topnode; currnode; currnode = currnode->listnext)
 514             if (filenum != currnode->filenum) {
 515                 filenum = currnode->filenum;
 516                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 517                         currnode->pos);
 518             }
 519     }
 520     fprintf(fp, "\037\nTag Table:\n");
 521     if (conf.maxfilesize > 0)
 522         fprintf(fp, "(Indirect)\n");
 523     for (currnode = topnode; currnode; currnode = currnode->listnext)
 524         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 525     fprintf(fp, "\037\nEnd Tag Table\n");
 526     fclose(fp);
 527
 528     /*
 529      * Write the subfiles.
 530      */
 531     if (conf.maxfilesize > 0) {
 532         int filenum = 0;
 533         fp = NULL;
 534
 535         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 536             if (filenum != currnode->filenum) {
 537                 char *fname;
 538
 539                 filenum = currnode->filenum;
 540
 541                 if (fp)
 542                     fclose(fp);
 543                 fname = mknewa(char, strlen(conf.filename) + 40);
 544                 sprintf(fname, "%s-%d", conf.filename, filenum);
 545                 fp = fopen(fname, "w");
 546                 if (!fp) {
 547                     error(err_cantopenw, fname);
 548                     return;
 549                 }
 550                 sfree(fname);
 551                 fputs(intro_text.output.text, fp);
 552             }
 553             fputs(currnode->text.output.text, fp);
 554         }
 555
 556         if (fp)
 557             fclose(fp);
 558     }
 559 }
 560
 561 static int info_check_index(word *w, node *n, indexdata *idx)
 562 {
 563     int ret = 0;
 564
 565     for (; w; w = w->next) {
 566         if (w->type == word_IndexRef) {
 567             indextag *tag;
 568             int i;
 569
 570             tag = index_findtag(idx, w->text);
 571             if (!tag)
 572                 break;
 573
 574             for (i = 0; i < tag->nrefs; i++) {
 575                 indexentry *entry = tag->refs[i];
 576                 info_idx *ii = (info_idx *)entry->backend_data;
 577
 578                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 579                     /*
 580                      * If the same index term is indexed twice
 581                      * within the same section, we only want to
 582                      * mention it once in the index. So do nothing
 583                      * here.
 584                      */
 585                     continue;
 586                 }
 587
 588                 if (ii->nnodes >= ii->nodesize) {
 589                     ii->nodesize += 32;
 590                     ii->nodes = resize(ii->nodes, ii->nodesize);
 591                 }
 592
 593                 ii->nodes[ii->nnodes++] = n;
 594
 595                 ret = 1;
 596             }
 597         }
 598     }
 599
 600     return ret;
 601 }
 602
 603 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 604 {
 605     word *ret = dup_word_list(words);
 606     word *w;
 607     keyword *kwl;
 608
 609     for (w = ret; w; w = w->next) {
 610         w->private_data = NULL;
 611         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 612             kwl = kw_lookup(keywords, w->text);
 613             if (kwl) {
 614                 if (kwl->para->type == para_NumberedList ||
 615                     kwl->para->type == para_BiblioCited) {
 616                     /*
 617                      * In Info, we do nothing special for xrefs to
 618                      * numbered list items or bibliography entries.
 619                      */
 620                     break;
 621                 } else {
 622                     /*
 623                      * An xref to a different section has its text
 624                      * completely replaced.
 625                      */
 626                     word *w2, *w3, *w4;
 627                     w2 = w3 = w->next;
 628                     w4 = NULL;
 629                     while (w2) {
 630                         if (w2->type == word_XrefEnd) {
 631                             w4 = w2->next;
 632                             w2->next = NULL;
 633                             break;
 634                         }
 635                         w2 = w2->next;
 636                     }
 637                     free_word_list(w3);
 638
 639                     /*
 640                      * Now w is the UpperXref / LowerXref we
 641                      * started with, and w4 is the next word after
 642                      * the corresponding XrefEnd (if any). The
 643                      * simplest thing is just to stick a pointer to
 644                      * the target node structure in the private
 645                      * data field of the xref word, and let
 646                      * info_rdaddwc and friends read the node name
 647                      * out from there.
 648                      */
 649                     w->next = w4;
 650                     w->private_data = kwl->para->private_data;
 651                     assert(w->private_data);
 652                 }
 653             }
 654         }
 655     }
 656
 657     return ret;
 658 }
 659
 660 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs) {
 661     int ret = 0;
 662
 663     for (; words && words != end; words = words->next) switch (words->type) {
 664       case word_HyperLink:
 665       case word_HyperEnd:
 666       case word_XrefEnd:
 667       case word_IndexRef:
 668         break;
 669
 670       case word_Normal:
 671       case word_Emph:
 672       case word_Code:
 673       case word_WeakCode:
 674       case word_WhiteSpace:
 675       case word_EmphSpace:
 676       case word_CodeSpace:
 677       case word_WkCodeSpace:
 678       case word_Quote:
 679       case word_EmphQuote:
 680       case word_CodeQuote:
 681       case word_WkCodeQuote:
 682         assert(words->type != word_CodeQuote &&
 683                words->type != word_WkCodeQuote);
 684         if (towordstyle(words->type) == word_Emph &&
 685             (attraux(words->aux) == attr_First ||
 686              attraux(words->aux) == attr_Only))
 687             ret += info_rdadd(id, L'_');      /* FIXME: configurability */
 688         else if (towordstyle(words->type) == word_Code &&
 689                  (attraux(words->aux) == attr_First ||
 690                   attraux(words->aux) == attr_Only))
 691             ret += info_rdadd(id, L'`');      /* FIXME: configurability */
 692         if (removeattr(words->type) == word_Normal) {
 693             if (cvt_ok(id->charset, words->text) || !words->alt)
 694                 ret += info_rdadds(id, words->text);
 695             else
 696                 ret += info_rdaddwc(id, words->alt, NULL, FALSE);
 697         } else if (removeattr(words->type) == word_WhiteSpace) {
 698             ret += info_rdadd(id, L' ');
 699         } else if (removeattr(words->type) == word_Quote) {
 700             ret += info_rdadd(id, quoteaux(words->aux) == quote_Open ? L'`' : L'\'');
 701                                        /* FIXME: configurability */
 702         }
 703         if (towordstyle(words->type) == word_Emph &&
 704             (attraux(words->aux) == attr_Last ||
 705              attraux(words->aux) == attr_Only))
 706             ret += info_rdadd(id, L'_');     /* FIXME: configurability */
 707         else if (towordstyle(words->type) == word_Code &&
 708                  (attraux(words->aux) == attr_Last ||
 709                   attraux(words->aux) == attr_Only))
 710             ret += info_rdadd(id, L'\'');     /* FIXME: configurability */
 711         break;
 712
 713       case word_UpperXref:
 714       case word_LowerXref:
 715         if (xrefs && words->private_data) {
 716             /*
 717              * This bit is structural and so must be done in char
 718              * rather than wchar_t.
 719              */
 720             ret += info_rdaddsc(id, "*Note ");
 721             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 722             ret += info_rdaddsc(id, "::");
 723         }
 724         break;
 725     }
 726
 727     return ret;
 728 }
 729
 730 static int info_width_internal(word *words, int xrefs, int charset);
 731
 732 static int info_width_internal_list(word *words, int xrefs, int charset) {
 733     int w = 0;
 734     while (words) {
 735         w += info_width_internal(words, xrefs, charset);
 736         words = words->next;
 737     }
 738     return w;
 739 }
 740
 741 static int info_width_internal(word *words, int xrefs, int charset) {
 742     switch (words->type) {
 743       case word_HyperLink:
 744       case word_HyperEnd:
 745       case word_XrefEnd:
 746       case word_IndexRef:
 747         return 0;
 748
 749       case word_Normal:
 750       case word_Emph:
 751       case word_Code:
 752       case word_WeakCode:
 753         return (((words->type == word_Emph ||
 754                   words->type == word_Code)
 755                  ? (attraux(words->aux) == attr_Only ? 2 :
 756                     attraux(words->aux) == attr_Always ? 0 : 1)
 757                  : 0) +
 758                 (cvt_ok(charset, words->text) || !words->alt ?
 759                  ustrwid(words->text, charset) :
 760                  info_width_internal_list(words->alt, xrefs, charset)));
 761
 762       case word_WhiteSpace:
 763       case word_EmphSpace:
 764       case word_CodeSpace:
 765       case word_WkCodeSpace:
 766       case word_Quote:
 767       case word_EmphQuote:
 768       case word_CodeQuote:
 769       case word_WkCodeQuote:
 770         assert(words->type != word_CodeQuote &&
 771                words->type != word_WkCodeQuote);
 772         return (((towordstyle(words->type) == word_Emph ||
 773                   towordstyle(words->type) == word_Code)
 774                  ? (attraux(words->aux) == attr_Only ? 2 :
 775                     attraux(words->aux) == attr_Always ? 0 : 1)
 776                  : 0) + 1);
 777
 778       case word_UpperXref:
 779       case word_LowerXref:
 780         if (xrefs && words->private_data) {
 781             /* "*Note " plus "::" comes to 8 characters */
 782             return 8 + strwid(((node *)words->private_data)->name, charset);
 783         }
 784         break;
 785     }
 786     return 0;                          /* should never happen */
 787 }
 788
 789 static int info_width_noxrefs(void *ctx, word *words)
 790 {
 791     return info_width_internal(words, FALSE, *(int *)ctx);
 792 }
 793 static int info_width_xrefs(void *ctx, word *words)
 794 {
 795     return info_width_internal(words, TRUE, *(int *)ctx);
 796 }
 797
 798 static void info_heading(info_data *text, word *tprefix,
 799                          word *words, int width) {
 800     int length;
 801     int firstlinewidth, wrapwidth;
 802     wrappedline *wrapping, *p;
 803
 804     length = 0;
 805     if (tprefix) {
 806         length += info_rdaddwc(text, tprefix, NULL, FALSE);
 807         length += info_rdadds(text, L": ");/* FIXME: configurability */
 808     }
 809
 810     wrapwidth = width;
 811     firstlinewidth = width - length;
 812
 813     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
 814                          info_width_noxrefs, &text->charset, 0);
 815     for (p = wrapping; p; p = p->next) {
 816         length += info_rdaddwc(text, p->begin, p->end, FALSE);
 817         info_rdadd(text, L'\n');
 818         while (length--)
 819             info_rdadd(text, L'-');  /* FIXME: configurability */
 820         info_rdadd(text, L'\n');
 821         length = 0;
 822     }
 823     wrap_free(wrapping);
 824     info_rdadd(text, L'\n');
 825 }
 826
 827 static void info_rule(info_data *text, int indent, int width) {
 828     while (indent--) info_rdadd(text, L' ');
 829     while (width--) info_rdadd(text, L'-');
 830     info_rdadd(text, L'\n');
 831     info_rdadd(text, L'\n');
 832 }
 833
 834 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 835                       word *input, keywordlist *keywords,
 836                       int indent, int extraindent, int width) {
 837     wrappedline *wrapping, *p;
 838     word *words;
 839     int e;
 840     int i;
 841     int firstlinewidth = width;
 842
 843     words = info_transform_wordlist(input, keywords);
 844
 845     if (prefix) {
 846         for (i = 0; i < indent; i++)
 847             info_rdadd(text, L' ');
 848         e = info_rdaddwc(text, prefix, NULL, FALSE);
 849         if (prefixextra)
 850             e += info_rdadds(text, prefixextra);
 851         /* If the prefix is too long, shorten the first line to fit. */
 852         e = extraindent - e;
 853         if (e < 0) {
 854             firstlinewidth += e;       /* this decreases it, since e < 0 */
 855             if (firstlinewidth < 0) {
 856                 e = indent + extraindent;
 857                 firstlinewidth = width;
 858                 info_rdadd(text, L'\n');
 859             } else
 860                 e = 0;
 861         }
 862     } else
 863         e = indent + extraindent;
 864
 865     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
 866                          &text->charset, 0);
 867     for (p = wrapping; p; p = p->next) {
 868         for (i = 0; i < e; i++)
 869             info_rdadd(text, L' ');
 870         info_rdaddwc(text, p->begin, p->end, TRUE);
 871         info_rdadd(text, L'\n');
 872         e = indent + extraindent;
 873     }
 874     wrap_free(wrapping);
 875     info_rdadd(text, L'\n');
 876
 877     free_word_list(words);
 878 }
 879
 880 static void info_codepara(info_data *text, word *words,
 881                           int indent, int width) {
 882     int i;
 883
 884     for (; words; words = words->next) if (words->type == word_WeakCode) {
 885         for (i = 0; i < indent; i++)
 886             info_rdadd(text, L' ');
 887         if (info_rdadds(text, words->text) > width) {
 888             /* FIXME: warn */
 889         }
 890         info_rdadd(text, L'\n');
 891     }
 892
 893     info_rdadd(text, L'\n');
 894 }
 895
 896 static void info_versionid(info_data *text, word *words) {
 897     info_rdadd(text, L'[');                    /* FIXME: configurability */
 898     info_rdaddwc(text, words, NULL, FALSE);
 899     info_rdadds(text, L"]\n");
 900 }
 901
 902 static node *info_node_new(char *name, int charset)
 903 {
 904     node *n;
 905
 906     n = mknew(node);
 907     n->text = empty_info_data;
 908     n->text.charset = charset;
 909     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
 910     n->name = dupstr(name);
 911     n->started_menu = FALSE;
 912
 913     return n;
 914 }
 915
 916 static char *info_node_name(paragraph *par, int charset)
 917 {
 918     info_data id = EMPTY_INFO_DATA;
 919     char *p, *q;
 920
 921     id.charset = charset;
 922     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, NULL, FALSE);
 923     info_rdaddsc(&id, NULL);
 924
 925     /*
 926      * We cannot have commas or colons in a node name. Remove any
 927      * that we find, with a warning.
 928      */
 929     p = q = id.output.text;
 930     while (*p) {
 931         if (*p == ':' || *p == ',') {
 932             error(err_infonodechar, &par->fpos, *p);
 933         } else {
 934             *q++ = *p;
 935         }
 936         p++;
 937     }
 938     *p = '\0';
 939
 940     return id.output.text;
 941 }
 942
 943 static void info_menu_item(info_data *text, node *n, paragraph *p)
 944 {
 945     /*
 946      * FIXME: Depending on how we're doing node names in this info
 947      * file, we might want to do
 948      *
 949      *   * Node name:: Chapter title
 950      *
 951      * _or_
 952      *
 953      *   * Chapter number: Node name.
 954      *
 955      * This function mostly works in char rather than wchar_t,
 956      * because a menu item is a structural component.
 957      */
 958     info_rdaddsc(text, "* ");
 959     info_rdaddsc(text, n->name);
 960     info_rdaddsc(text, "::");
 961     if (p) {
 962         info_rdaddc(text, ' ');
 963         info_rdaddwc(text, p->words, NULL, FALSE);
 964     }
 965     info_rdaddc(text, '\n');
 966 }
 967
 968 /*
 969  * These functions implement my wrapper on the rdadd* calls which
 970  * allows me to switch arbitrarily between literal octet-string
 971  * text and charset-translated Unicode. (Because no matter what
 972  * character set I write the actual text in, I expect info readers
 973  * to treat node names and file names literally and to expect
 974  * keywords like `*Note' in their canonical form, so I have to take
 975  * steps to ensure that those structural elements of the file
 976  * aren't messed with.)
 977  */
 978 static int info_rdadds(info_data *d, wchar_t const *wcs)
 979 {
 980     if (!d->wcmode) {
 981         d->state = charset_init_state;
 982         d->wcmode = TRUE;
 983     }
 984
 985     if (wcs) {
 986         char buf[256];
 987         int len, width, ret;
 988
 989         width = ustrwid(wcs, d->charset);
 990
 991         len = ustrlen(wcs);
 992         while (len > 0) {
 993             int prevlen = len;
 994
 995             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
 996                                        d->charset, &d->state, NULL);
 997
 998             assert(len < prevlen);
 999
1000             if (ret > 0) {
1001                 buf[ret] = '\0';
1002                 rdaddsc(&d->output, buf);
1003             }
1004         }
1005
1006         return width;
1007     } else
1008         return 0;
1009 }
1010
1011 static int info_rdaddsc(info_data *d, char const *cs)
1012 {
1013     if (d->wcmode) {
1014         char buf[256];
1015         int ret;
1016
1017         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1018                                    d->charset, &d->state, NULL);
1019         if (ret > 0) {
1020             buf[ret] = '\0';
1021             rdaddsc(&d->output, buf);
1022         }
1023
1024         d->wcmode = FALSE;
1025     }
1026
1027     if (cs) {
1028         rdaddsc(&d->output, cs);
1029         return strlen(cs);
1030     } else
1031         return 0;
1032 }
1033
1034 static int info_rdadd(info_data *d, wchar_t wc)
1035 {
1036     wchar_t wcs[2];
1037     wcs[0] = wc;
1038     wcs[1] = L'\0';
1039     return info_rdadds(d, wcs);
1040 }
1041
1042 static int info_rdaddc(info_data *d, char c)
1043 {
1044     char cs[2];
1045     cs[0] = c;
1046     cs[1] = '\0';
1047     return info_rdaddsc(d, cs);
1048 }