mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_paper.c

   1 /*
   2  * Paper printing pre-backend for Halibut.
   3  *
   4  * This module does all the processing common to both PostScript
   5  * and PDF output: selecting fonts, line wrapping and page breaking
   6  * in accordance with font metrics, laying out the contents and
   7  * index pages, generally doing all the page layout. After this,
   8  * bk_ps.c and bk_pdf.c should only need to do linear translations
   9  * into their literal output format.
  10  */
  11
  12 /*
  13  * To be done:
  14  *
  15  *  - set up contents section now we know what sections begin on
  16  *    which pages
  17  *
  18  *  - do PDF outline
  19  *
  20  *  - index
  21  *
  22  *  - header/footer? Page numbers at least would be handy. Fully
  23  *    configurable footer can wait, though.
  24  *
  25  * That should bring us to the same level of functionality that
  26  * original-Halibut had, and the same in PDF plus the obvious
  27  * interactive navigation features. After that, in future work:
  28  *
  29  *  - linearised PDF, perhaps?
  30  *
  31  *  - I'm uncertain of whether I need to include a ToUnicode CMap
  32  *    in each of my font definitions in PDF. Currently things (by
  33  *    which I mean cut and paste out of acroread) seem to be
  34  *    working fairly happily without it, but I don't know.
  35  *
  36  *  - configurability
  37  *
  38  *  - title pages
  39  */
  40
  41 #include <assert.h>
  42 #include <stdio.h>
  43
  44 #include "halibut.h"
  45 #include "paper.h"
  46
  47 static font_data *make_std_font(font_list *fontlist, char const *name);
  48 static void wrap_paragraph(para_data *pdata, word *words,
  49                            int w, int i1, int i2);
  50 static page_data *page_breaks(line_data *first, line_data *last,
  51                               int page_height);
  52 static void render_line(line_data *ldata, int left_x, int top_y,
  53                         xref_dest *dest, keywordlist *keywords);
  54 static int paper_width_simple(para_data *pdata, word *text);
  55 static void code_paragraph(para_data *pdata,
  56                            font_data *fn, font_data *fi, font_data *fb,
  57                            int font_size, int indent, word *words);
  58 static void rule_paragraph(para_data *pdata, int indent, int height);
  59 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
  60
  61 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
  62                         indexdata *idx) {
  63     paragraph *p;
  64     document *doc;
  65     int indent, extra_indent, firstline_indent, aux_indent;
  66     para_data *pdata;
  67     line_data *ldata, *firstline, *lastline;
  68     font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
  69     page_data *pages;
  70     font_list *fontlist;
  71     word *aux, *aux2;
  72
  73     /*
  74      * FIXME: All these things ought to become configurable.
  75      */
  76     int paper_width = 595 * 4096;
  77     int paper_height = 841 * 4096;
  78     int left_margin = 72 * 4096;
  79     int top_margin = 72 * 4096;
  80     int right_margin = 72 * 4096;
  81     int bottom_margin = 108 * 4096;
  82     int indent_list_bullet = 6 * 4096;
  83     int indent_list = 24 * 4096;
  84     int indent_quote = 18 * 4096;
  85     int base_leading = 4096;
  86     int base_para_spacing = 10 * 4096;
  87     int chapter_top_space = 72 * 4096;
  88     int sect_num_left_space = 12 * 4096;
  89     int chapter_underline_depth = 14 * 4096;
  90     int chapter_underline_thickness = 3 * 4096;
  91     int rule_thickness = 1 * 4096;
  92
  93     int base_width = paper_width - left_margin - right_margin;
  94     int page_height = paper_height - top_margin - bottom_margin;
  95
  96     IGNORE(idx);                       /* FIXME */
  97
  98     /*
  99      * First, set up some font structures.
 100      */
 101     fontlist = mknew(font_list);
 102     fontlist->head = fontlist->tail = NULL;
 103     tr = make_std_font(fontlist, "Times-Roman");
 104     ti = make_std_font(fontlist, "Times-Italic");
 105     hr = make_std_font(fontlist, "Helvetica-Bold");
 106     hi = make_std_font(fontlist, "Helvetica-BoldOblique");
 107     cr = make_std_font(fontlist, "Courier");
 108     co = make_std_font(fontlist, "Courier-Oblique");
 109     cb = make_std_font(fontlist, "Courier-Bold");
 110
 111     /*
 112      * Go through and break up each paragraph into lines.
 113      */
 114     indent = 0;
 115     firstline = lastline = NULL;
 116     for (p = sourceform; p; p = p->next) {
 117         p->private_data = NULL;
 118
 119         switch (p->type) {
 120             /*
 121              * These paragraph types are either invisible or don't
 122              * define text in the normal sense. Either way, they
 123              * don't require wrapping.
 124              */
 125           case para_IM:
 126           case para_BR:
 127           case para_Biblio:
 128           case para_NotParaType:
 129           case para_Config:
 130           case para_VersionID:
 131           case para_NoCite:
 132             break;
 133
 134             /*
 135              * These paragraph types don't require wrapping, but
 136              * they do affect the line width to which we wrap the
 137              * rest of the paragraphs, so we need to pay attention.
 138              */
 139           case para_LcontPush:
 140             indent += indent_list; break;
 141           case para_LcontPop:
 142             indent -= indent_list; assert(indent >= 0); break;
 143           case para_QuotePush:
 144             indent += indent_quote; break;
 145           case para_QuotePop:
 146             indent -= indent_quote; assert(indent >= 0); break;
 147
 148             /*
 149              * This paragraph type is special. Process it
 150              * specially.
 151              */
 152           case para_Code:
 153             pdata = mknew(para_data);
 154             code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
 155             p->private_data = pdata;
 156             if (pdata->first != pdata->last) {
 157                 pdata->first->penalty_after += 100000;
 158                 pdata->last->penalty_before += 100000;
 159             }
 160             break;
 161
 162             /*
 163              * This paragraph is also special.
 164              */
 165           case para_Rule:
 166             pdata = mknew(para_data);
 167             rule_paragraph(pdata, indent, rule_thickness);
 168             p->private_data = pdata;
 169             break;
 170
 171             /*
 172              * All of these paragraph types require wrapping in the
 173              * ordinary way. So we must supply a set of fonts, a
 174              * line width and auxiliary information (e.g. bullet
 175              * text) for each one.
 176              */
 177           case para_Chapter:
 178           case para_Appendix:
 179           case para_UnnumberedChapter:
 180           case para_Heading:
 181           case para_Subsect:
 182           case para_Normal:
 183           case para_BiblioCited:
 184           case para_Bullet:
 185           case para_NumberedList:
 186           case para_DescribedThing:
 187           case para_Description:
 188           case para_Copyright:
 189           case para_Title:
 190             pdata = mknew(para_data);
 191
 192             /*
 193              * Choose fonts for this paragraph.
 194              *
 195              * FIXME: All of this ought to be completely
 196              * user-configurable.
 197              */
 198             switch (p->type) {
 199               case para_Title:
 200                 pdata->fonts[FONT_NORMAL] = hr;
 201                 pdata->sizes[FONT_NORMAL] = 24;
 202                 pdata->fonts[FONT_EMPH] = hi;
 203                 pdata->sizes[FONT_EMPH] = 24;
 204                 pdata->fonts[FONT_CODE] = cb;
 205                 pdata->sizes[FONT_CODE] = 24;
 206                 break;
 207
 208               case para_Chapter:
 209               case para_Appendix:
 210               case para_UnnumberedChapter:
 211                 pdata->fonts[FONT_NORMAL] = hr;
 212                 pdata->sizes[FONT_NORMAL] = 20;
 213                 pdata->fonts[FONT_EMPH] = hi;
 214                 pdata->sizes[FONT_EMPH] = 20;
 215                 pdata->fonts[FONT_CODE] = cb;
 216                 pdata->sizes[FONT_CODE] = 20;
 217                 break;
 218
 219               case para_Heading:
 220               case para_Subsect:
 221                 pdata->fonts[FONT_NORMAL] = hr;
 222                 pdata->fonts[FONT_EMPH] = hi;
 223                 pdata->fonts[FONT_CODE] = cb;
 224                 pdata->sizes[FONT_NORMAL] =
 225                     pdata->sizes[FONT_EMPH] =
 226                     pdata->sizes[FONT_CODE] =
 227                     (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
 228                 break;
 229
 230               case para_Normal:
 231               case para_BiblioCited:
 232               case para_Bullet:
 233               case para_NumberedList:
 234               case para_DescribedThing:
 235               case para_Description:
 236               case para_Copyright:
 237                 pdata->fonts[FONT_NORMAL] = tr;
 238                 pdata->sizes[FONT_NORMAL] = 12;
 239                 pdata->fonts[FONT_EMPH] = ti;
 240                 pdata->sizes[FONT_EMPH] = 12;
 241                 pdata->fonts[FONT_CODE] = cr;
 242                 pdata->sizes[FONT_CODE] = 12;
 243                 break;
 244             }
 245
 246             /*
 247              * Also select an indentation level depending on the
 248              * paragraph type (list paragraphs other than
 249              * para_DescribedThing need extra indent).
 250              *
 251              * (FIXME: Perhaps at some point we might even arrange
 252              * for the user to be able to request indented first
 253              * lines in paragraphs.)
 254              */
 255             if (p->type == para_Bullet ||
 256                 p->type == para_NumberedList ||
 257                 p->type == para_Description) {
 258                 extra_indent = firstline_indent = indent_list;
 259             } else {
 260                 extra_indent = firstline_indent = 0;
 261             }
 262
 263             /*
 264              * Find the auxiliary text for this paragraph.
 265              */
 266             aux = aux2 = NULL;
 267             aux_indent = 0;
 268
 269             switch (p->type) {
 270               case para_Chapter:
 271               case para_Appendix:
 272               case para_Heading:
 273               case para_Subsect:
 274                 /*
 275                  * For some heading styles (FIXME: be able to
 276                  * configure which), the auxiliary text contains
 277                  * the chapter number and is arranged to be
 278                  * right-aligned a few points left of the primary
 279                  * margin. For other styles, the auxiliary text is
 280                  * the full chapter _name_ and takes up space
 281                  * within the (wrapped) chapter title, meaning that
 282                  * we must move the first line indent over to make
 283                  * space for it.
 284                  */
 285                 if (p->type == para_Heading || p->type == para_Subsect) {
 286                     int len;
 287
 288                     aux = p->kwtext2;
 289                     len = paper_width_simple(pdata, p->kwtext2);
 290                     aux_indent = -len - sect_num_left_space;
 291                 } else {
 292                     aux = p->kwtext;
 293                     aux2 = mknew(word);
 294                     aux2->next = NULL;
 295                     aux2->alt = NULL;
 296                     aux2->type = word_Normal;
 297                     aux2->text = ustrdup(L": ");
 298                     aux2->breaks = FALSE;
 299                     aux2->aux = 0;
 300                     aux_indent = 0;
 301
 302                     firstline_indent += paper_width_simple(pdata, aux);
 303                     firstline_indent += paper_width_simple(pdata, aux2);
 304                 }
 305                 break;
 306
 307               case para_Bullet:
 308                 /*
 309                  * Auxiliary text consisting of a bullet. (FIXME:
 310                  * configurable bullet.)
 311                  */
 312                 aux = mknew(word);
 313                 aux->next = NULL;
 314                 aux->alt = NULL;
 315                 aux->type = word_Normal;
 316                 aux->text = ustrdup(L"\x2022");
 317                 aux->breaks = FALSE;
 318                 aux->aux = 0;
 319                 aux_indent = indent + indent_list_bullet;
 320                 break;
 321
 322               case para_NumberedList:
 323                 /*
 324                  * Auxiliary text consisting of the number followed
 325                  * by a (FIXME: configurable) full stop.
 326                  */
 327                 aux = p->kwtext;
 328                 aux2 = mknew(word);
 329                 aux2->next = NULL;
 330                 aux2->alt = NULL;
 331                 aux2->type = word_Normal;
 332                 aux2->text = ustrdup(L".");
 333                 aux2->breaks = FALSE;
 334                 aux2->aux = 0;
 335                 aux_indent = indent + indent_list_bullet;
 336                 break;
 337
 338               case para_BiblioCited:
 339                 /*
 340                  * Auxiliary text consisting of the bibliography
 341                  * reference text, and a trailing space.
 342                  */
 343                 aux = p->kwtext;
 344                 aux2 = mknew(word);
 345                 aux2->next = NULL;
 346                 aux2->alt = NULL;
 347                 aux2->type = word_Normal;
 348                 aux2->text = ustrdup(L" ");
 349                 aux2->breaks = FALSE;
 350                 aux2->aux = 0;
 351                 aux_indent = indent;
 352                 firstline_indent += paper_width_simple(pdata, aux);
 353                 firstline_indent += paper_width_simple(pdata, aux2);
 354                 break;
 355             }
 356
 357             wrap_paragraph(pdata, p->words, base_width,
 358                            indent + firstline_indent,
 359                            indent + extra_indent);
 360
 361             p->private_data = pdata;
 362
 363             pdata->first->aux_text = aux;
 364             pdata->first->aux_text_2 = aux2;
 365             pdata->first->aux_left_indent = aux_indent;
 366
 367             /*
 368              * Line breaking penalties.
 369              */
 370             switch (p->type) {
 371               case para_Chapter:
 372               case para_Appendix:
 373               case para_Heading:
 374               case para_Subsect:
 375               case para_UnnumberedChapter:
 376                 /*
 377                  * Fixed and large penalty for breaking straight
 378                  * after a heading; corresponding bonus for
 379                  * breaking straight before.
 380                  */
 381                 pdata->first->penalty_before = -500000;
 382                 pdata->last->penalty_after = 500000;
 383                 for (ldata = pdata->first; ldata; ldata = ldata->next)
 384                     ldata->penalty_after = 500000;
 385                 break;
 386
 387               case para_DescribedThing:
 388                 /*
 389                  * This is treated a bit like a small heading:
 390                  * there's a penalty for breaking after it (i.e.
 391                  * between it and its description), and a bonus for
 392                  * breaking before it (actually _between_ list
 393                  * items).
 394                  */
 395                 pdata->first->penalty_before = -200000;
 396                 pdata->last->penalty_after = 200000;
 397                 break;
 398
 399               default:
 400                 /*
 401                  * Most paragraph types: widow/orphan control by
 402                  * discouraging breaking one line from the end of
 403                  * any paragraph.
 404                  */
 405                 if (pdata->first != pdata->last) {
 406                     pdata->first->penalty_after = 100000;
 407                     pdata->last->penalty_before = 100000;
 408                 }
 409                 break;
 410             }
 411
 412             break;
 413         }
 414
 415         if (p->private_data) {
 416             pdata = (para_data *)p->private_data;
 417
 418             /*
 419              * Set the line spacing for each line in this paragraph.
 420              */
 421             for (ldata = pdata->first; ldata; ldata = ldata->next) {
 422                 if (ldata == pdata->first)
 423                     ldata->space_before = base_para_spacing / 2;
 424                 else
 425                     ldata->space_before = base_leading / 2;
 426                 if (ldata == pdata->last)
 427                     ldata->space_after = base_para_spacing / 2;
 428                 else
 429                     ldata->space_after = base_leading / 2;
 430                 ldata->page_break = FALSE;
 431             }
 432
 433             /*
 434              * Some kinds of section heading do require a page
 435              * break before them.
 436              */
 437             if (p->type == para_Title ||
 438                 p->type == para_Chapter ||
 439                 p->type == para_Appendix ||
 440                 p->type == para_UnnumberedChapter) {
 441                 pdata->first->page_break = TRUE;
 442                 pdata->first->space_before = chapter_top_space;
 443                 pdata->last->space_after +=
 444                     chapter_underline_depth + chapter_underline_thickness;
 445             }
 446
 447             /*
 448              * Link all line structures together into a big list.
 449              */
 450             if (pdata->first) {
 451                 if (lastline) {
 452                     lastline->next = pdata->first;
 453                     pdata->first->prev = lastline;
 454                 } else {
 455                     firstline = pdata->first;
 456                     pdata->first->prev = NULL;
 457                 }
 458                 lastline = pdata->last;
 459             }
 460         }
 461     }
 462
 463     /*
 464      * Now we have an enormous linked list of every line of text in
 465      * the document. Break it up into pages.
 466      */
 467     pages = page_breaks(firstline, lastline, page_height);
 468
 469     /*
 470      * Now we're ready to actually lay out the pages. We do this by
 471      * looping over _paragraphs_, since we may need to track cross-
 472      * references between lines and even across pages.
 473      */
 474     for (p = sourceform; p; p = p->next) {
 475         pdata = (para_data *)p->private_data;
 476
 477         if (pdata) {
 478             xref_dest dest;
 479             dest.type = NONE;
 480             for (ldata = pdata->first; ldata; ldata = ldata->next) {
 481                 render_line(ldata, left_margin, paper_height - top_margin,
 482                             &dest, keywords);
 483                 if (ldata == pdata->last)
 484                     break;
 485             }
 486
 487             /*
 488              * Some section headings (FIXME: should be configurable
 489              * which) want to be underlined.
 490              */
 491             if (p->type == para_Chapter || p->type == para_Appendix ||
 492                 p->type == para_UnnumberedChapter || p->type == para_Title) {
 493                 add_rect_to_page(pdata->last->page,
 494                                  left_margin,
 495                                  (paper_height - top_margin -
 496                                   pdata->last->ypos - chapter_underline_depth),
 497                                  base_width,
 498                                  chapter_underline_thickness);
 499             }
 500
 501             /*
 502              * Rule paragraphs need to contain an actual rule!
 503              */
 504             if (p->type == para_Rule) {
 505                 add_rect_to_page(pdata->first->page,
 506                                  left_margin + pdata->first->xpos,
 507                                  (paper_height - top_margin -
 508                                   pdata->last->ypos -
 509                                   pdata->last->line_height),
 510                                  base_width - pdata->first->xpos,
 511                                  pdata->last->line_height);
 512             }
 513         }
 514     }
 515
 516     doc = mknew(document);
 517     doc->fonts = fontlist;
 518     doc->pages = pages;
 519     doc->paper_width = paper_width;
 520     doc->paper_height = paper_height;
 521     return doc;
 522 }
 523
 524 static font_encoding *new_font_encoding(font_data *font)
 525 {
 526     font_encoding *fe;
 527     int i;
 528
 529     fe = mknew(font_encoding);
 530     fe->next = NULL;
 531
 532     if (font->list->tail)
 533         font->list->tail->next = fe;
 534     else
 535         font->list->head = fe;
 536     font->list->tail = fe;
 537
 538     fe->font = font;
 539     fe->free_pos = 0x21;
 540
 541     for (i = 0; i < 256; i++) {
 542         fe->vector[i] = NULL;
 543         fe->indices[i] = -1;
 544         fe->to_unicode[i] = 0xFFFF;
 545     }
 546
 547     return fe;
 548 }
 549
 550 static font_data *make_std_font(font_list *fontlist, char const *name)
 551 {
 552     const int *widths;
 553     int nglyphs;
 554     font_data *f;
 555     font_encoding *fe;
 556     int i;
 557
 558     widths = ps_std_font_widths(name);
 559     if (!widths)
 560         return NULL;
 561
 562     for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
 563
 564     f = mknew(font_data);
 565
 566     f->list = fontlist;
 567     f->name = name;
 568     f->nglyphs = nglyphs;
 569     f->glyphs = ps_std_glyphs;
 570     f->widths = widths;
 571     f->subfont_map = mknewa(subfont_map_entry, nglyphs);
 572
 573     /*
 574      * Our first subfont will contain all of US-ASCII. This isn't
 575      * really necessary - we could just create custom subfonts
 576      * precisely as the whim of render_string dictated - but
 577      * instinct suggests that it might be nice to have the text in
 578      * the output files look _marginally_ recognisable.
 579      */
 580     fe = new_font_encoding(f);
 581     fe->free_pos = 0xA1;               /* only the top half is free */
 582     f->latest_subfont = fe;
 583
 584     for (i = 0; i < (int)lenof(f->bmp); i++)
 585         f->bmp[i] = 0xFFFF;
 586
 587     for (i = 0; i < nglyphs; i++) {
 588         wchar_t ucs;
 589         ucs = ps_glyph_to_unicode(f->glyphs[i]);
 590         assert(ucs != 0xFFFF);
 591         f->bmp[ucs] = i;
 592         if (ucs >= 0x20 && ucs <= 0x7E) {
 593             fe->vector[ucs] = f->glyphs[i];
 594             fe->indices[ucs] = i;
 595             fe->to_unicode[ucs] = ucs;
 596             f->subfont_map[i].subfont = fe;
 597             f->subfont_map[i].position = ucs;
 598         } else {
 599             /*
 600              * This character is not yet assigned to a subfont.
 601              */
 602             f->subfont_map[i].subfont = NULL;
 603             f->subfont_map[i].position = 0;
 604         }
 605     }
 606
 607     return f;
 608 }
 609
 610 static int string_width(font_data *font, wchar_t const *string, int *errs)
 611 {
 612     int width = 0;
 613
 614     if (errs)
 615         *errs = 0;
 616
 617     for (; *string; string++) {
 618         int index;
 619
 620         index = font->bmp[(unsigned short)*string];
 621         if (index == 0xFFFF) {
 622             if (errs)
 623                 *errs = 1;
 624         } else {
 625             width += font->widths[index];
 626         }
 627     }
 628
 629     return width;
 630 }
 631
 632 static int paper_width_internal(void *vctx, word *word, int *nspaces);
 633
 634 struct paper_width_ctx {
 635     int minspacewidth;
 636     para_data *pdata;
 637 };
 638
 639 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
 640     int w = 0;
 641     while (text && text != end) {
 642         w += paper_width_internal(vctx, text, nspaces);
 643         text = text->next;
 644     }
 645     return w;
 646 }
 647
 648 static int paper_width_internal(void *vctx, word *word, int *nspaces)
 649 {
 650     struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
 651     int style, type, findex, width, errs;
 652     wchar_t *str;
 653
 654     switch (word->type) {
 655       case word_HyperLink:
 656       case word_HyperEnd:
 657       case word_UpperXref:
 658       case word_LowerXref:
 659       case word_XrefEnd:
 660       case word_IndexRef:
 661         return 0;
 662     }
 663
 664     style = towordstyle(word->type);
 665     type = removeattr(word->type);
 666
 667     findex = (style == word_Normal ? FONT_NORMAL :
 668               style == word_Emph ? FONT_EMPH :
 669               FONT_CODE);
 670
 671     if (type == word_Normal) {
 672         str = word->text;
 673     } else if (type == word_WhiteSpace) {
 674         if (findex != FONT_CODE) {
 675             if (nspaces)
 676                 (*nspaces)++;
 677             return ctx->minspacewidth;
 678         } else
 679             str = L" ";
 680     } else /* if (type == word_Quote) */ {
 681         if (word->aux == quote_Open)
 682             str = L"\x2018";           /* FIXME: configurability! */
 683         else
 684             str = L"\x2019";           /* FIXME: configurability! */
 685     }
 686
 687     width = string_width(ctx->pdata->fonts[findex], str, &errs);
 688
 689     if (errs && word->alt)
 690         return paper_width_list(vctx, word->alt, NULL, nspaces);
 691     else
 692         return ctx->pdata->sizes[findex] * width;
 693 }
 694
 695 static int paper_width(void *vctx, word *word)
 696 {
 697     return paper_width_internal(vctx, word, NULL);
 698 }
 699
 700 static int paper_width_simple(para_data *pdata, word *text)
 701 {
 702     struct paper_width_ctx ctx;
 703
 704     ctx.pdata = pdata;
 705     ctx.minspacewidth =
 706         (pdata->sizes[FONT_NORMAL] *
 707          string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
 708
 709     return paper_width_list(&ctx, text, NULL, NULL);
 710 }
 711
 712 static void wrap_paragraph(para_data *pdata, word *words,
 713                            int w, int i1, int i2)
 714 {
 715     wrappedline *wrapping, *p;
 716     int spacewidth;
 717     struct paper_width_ctx ctx;
 718     int line_height;
 719
 720     /*
 721      * We're going to need to store the line height in every line
 722      * structure we generate.
 723      */
 724     {
 725         int i;
 726         line_height = 0;
 727         for (i = 0; i < NFONTS; i++)
 728             if (line_height < pdata->sizes[i])
 729                 line_height = pdata->sizes[i];
 730         line_height *= 4096;
 731     }
 732
 733     spacewidth = (pdata->sizes[FONT_NORMAL] *
 734                   string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
 735     if (spacewidth == 0) {
 736         /*
 737          * A font without a space?! Disturbing. I hope this never
 738          * comes up, but I'll make a random guess anyway and set my
 739          * space width to half the point size.
 740          */
 741         spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
 742     }
 743
 744     /*
 745      * I'm going to set the _minimum_ space width to 3/5 of the
 746      * standard one, and use the standard one as the optimum.
 747      */
 748     ctx.minspacewidth = spacewidth * 3 / 5;
 749     ctx.pdata = pdata;
 750
 751     wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
 752
 753     /*
 754      * Having done the wrapping, we now concoct a set of line_data
 755      * structures.
 756      */
 757     pdata->first = pdata->last = NULL;
 758
 759     for (p = wrapping; p; p = p->next) {
 760         line_data *ldata;
 761         word *wd;
 762         int len, wid, spaces;
 763
 764         ldata = mknew(line_data);
 765
 766         ldata->pdata = pdata;
 767         ldata->first = p->begin;
 768         ldata->end = p->end;
 769         ldata->line_height = line_height;
 770
 771         ldata->xpos = (p == wrapping ? i1 : i2);
 772
 773         if (pdata->last) {
 774             pdata->last->next = ldata;
 775             ldata->prev = pdata->last;
 776         } else {
 777             pdata->first = ldata;
 778             ldata->prev = NULL;
 779         }
 780         ldata->next = NULL;
 781         pdata->last = ldata;
 782
 783         spaces = 0;
 784         len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
 785         wid = (p == wrapping ? w - i1 : w - i2);
 786         wd = ldata->first;
 787
 788         ldata->hshortfall = wid - len;
 789         ldata->nspaces = spaces;
 790         /*
 791          * This tells us how much the space width needs to
 792          * change from _min_spacewidth. But we want to store
 793          * its difference from the _natural_ space width, to
 794          * make the text rendering easier.
 795          */
 796         ldata->hshortfall += ctx.minspacewidth * spaces;
 797         ldata->hshortfall -= spacewidth * spaces;
 798         /*
 799          * Special case: on the last line of a paragraph, we
 800          * never stretch spaces.
 801          */
 802         if (ldata->hshortfall > 0 && !p->next)
 803             ldata->hshortfall = 0;
 804
 805         ldata->aux_text = NULL;
 806         ldata->aux_text_2 = NULL;
 807         ldata->aux_left_indent = 0;
 808         ldata->penalty_before = ldata->penalty_after = 0;
 809     }
 810
 811 }
 812
 813 static page_data *page_breaks(line_data *first, line_data *last,
 814                               int page_height)
 815 {
 816     line_data *l, *m;
 817     page_data *ph, *pt;
 818
 819     /*
 820      * Page breaking is done by a close analogue of the optimal
 821      * paragraph wrapping algorithm used by wrap_para(). We work
 822      * backwards from the end of the document line by line; for
 823      * each line, we contemplate every possible number of lines we
 824      * could put on a page starting with that line, determine a
 825      * cost function for each one, add it to the pre-computed cost
 826      * function for optimally page-breaking everything after that
 827      * page, and pick the best option.
 828      *
 829      * Since my line_data structures are only used for this
 830      * purpose, I might as well just store the algorithm data
 831      * directly in them.
 832      */
 833
 834     for (l = last; l; l = l->prev) {
 835         int minheight, text = 0, space = 0;
 836         int cost;
 837
 838         l->bestcost = -1;
 839         for (m = l; m; m = m->next) {
 840             if (m != l && m->page_break)
 841                 break;                 /* we've gone as far as we can */
 842
 843             if (m != l)
 844                 space += m->prev->space_after;
 845             if (m != l || m->page_break)
 846                 space += m->space_before;
 847             text += m->line_height;
 848             minheight = text + space;
 849
 850             if (m != l && minheight > page_height)
 851                 break;
 852
 853             /*
 854              * Compute the cost of this arrangement, as the square
 855              * of the amount of wasted space on the page.
 856              * Exception: if this is the last page before a
 857              * mandatory break or the document end, we don't
 858              * penalise a large blank area.
 859              */
 860             if (m->next && !m->next->page_break)
 861             {
 862                 int x = page_height - minheight;
 863                 int xf;
 864
 865                 xf = x & 0xFF;
 866                 x >>= 8;
 867
 868                 cost = x*x;
 869                 cost += (x * xf) >> 8;
 870             } else
 871                 cost = 0;
 872
 873             if (m->next && !m->next->page_break) {
 874                 cost += m->penalty_after;
 875                 cost += m->next->penalty_before;
 876             }
 877
 878             if (m->next && !m->next->page_break)
 879                 cost += m->next->bestcost;
 880             if (l->bestcost == -1 || l->bestcost > cost) {
 881                 /*
 882                  * This is the best option yet for this starting
 883                  * point.
 884                  */
 885                 l->bestcost = cost;
 886                 if (m->next && !m->next->page_break)
 887                     l->vshortfall = page_height - minheight;
 888                 else
 889                     l->vshortfall = 0;
 890                 l->text = text;
 891                 l->space = space;
 892                 l->page_last = m;
 893             }
 894         }
 895     }
 896
 897     /*
 898      * Now go through the line list forwards and assemble the
 899      * actual pages.
 900      */
 901     ph = pt = NULL;
 902
 903     l = first;
 904     while (l) {
 905         page_data *page;
 906         int text, space;
 907
 908         page = mknew(page_data);
 909         page->next = NULL;
 910         page->prev = pt;
 911         if (pt)
 912             pt->next = page;
 913         else
 914             ph = page;
 915         pt = page;
 916
 917         page->first_line = l;
 918         page->last_line = l->page_last;
 919
 920         page->first_text = page->last_text = NULL;
 921         page->first_xref = page->last_xref = NULL;
 922         page->first_rect = page->last_rect = NULL;
 923
 924         /*
 925          * Now assign a y-coordinate to each line on the page.
 926          */
 927         text = space = 0;
 928         for (l = page->first_line; l; l = l->next) {
 929             if (l != page->first_line)
 930                 space += l->prev->space_after;
 931             if (l != page->first_line || l->page_break)
 932                 space += l->space_before;
 933             text += l->line_height;
 934
 935             l->page = page;
 936             l->ypos = text + space +
 937                 space * (float)page->first_line->vshortfall /
 938                 page->first_line->space;
 939
 940             if (l == page->last_line)
 941                 break;
 942         }
 943
 944         l = page->last_line->next;
 945     }
 946
 947     return ph;
 948 }
 949
 950 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
 951 {
 952     rect *r = mknew(rect);
 953
 954     r->next = NULL;
 955     if (page->last_rect)
 956         page->last_rect->next = r;
 957     else
 958         page->first_rect = r;
 959     page->last_rect = r;
 960
 961     r->x = x;
 962     r->y = y;
 963     r->w = w;
 964     r->h = h;
 965 }
 966
 967 static void add_string_to_page(page_data *page, int x, int y,
 968                                font_encoding *fe, int size, char *text)
 969 {
 970     text_fragment *frag;
 971
 972     frag = mknew(text_fragment);
 973     frag->next = NULL;
 974
 975     if (page->last_text)
 976         page->last_text->next = frag;
 977     else
 978         page->first_text = frag;
 979     page->last_text = frag;
 980
 981     frag->x = x;
 982     frag->y = y;
 983     frag->fe = fe;
 984     frag->fontsize = size;
 985     frag->text = dupstr(text);
 986 }
 987
 988 /*
 989  * Returns the updated x coordinate.
 990  */
 991 static int render_string(page_data *page, font_data *font, int fontsize,
 992                          int x, int y, wchar_t *str)
 993 {
 994     char *text;
 995     int textpos, textwid, glyph;
 996     font_encoding *subfont = NULL, *sf;
 997
 998     text = mknewa(char, 1 + ustrlen(str));
 999     textpos = textwid = 0;
1000
1001     while (*str) {
1002         glyph = font->bmp[*str];
1003
1004         if (glyph == 0xFFFF)
1005             continue;                  /* nothing more we can do here */
1006
1007         /*
1008          * Find which subfont this character is going in.
1009          */
1010         sf = font->subfont_map[glyph].subfont;
1011
1012         if (!sf) {
1013             int c;
1014
1015             /*
1016              * This character is not yet in a subfont. Assign one.
1017              */
1018             if (font->latest_subfont->free_pos >= 0x100)
1019                 font->latest_subfont = new_font_encoding(font);
1020
1021             c = font->latest_subfont->free_pos++;
1022             if (font->latest_subfont->free_pos == 0x7F)
1023                 font->latest_subfont->free_pos = 0xA1;
1024
1025             font->subfont_map[glyph].subfont = font->latest_subfont;
1026             font->subfont_map[glyph].position = c;
1027             font->latest_subfont->vector[c] = font->glyphs[glyph];
1028             font->latest_subfont->indices[c] = glyph;
1029             font->latest_subfont->to_unicode[c] = *str;
1030
1031             sf = font->latest_subfont;
1032         }
1033
1034         if (!subfont || sf != subfont) {
1035             if (subfont) {
1036                 text[textpos] = '\0';
1037                 add_string_to_page(page, x, y, subfont, fontsize, text);
1038                 x += textwid;
1039             } else {
1040                 assert(textpos == 0);
1041             }
1042             textpos = 0;
1043             subfont = sf;
1044         }
1045
1046         text[textpos++] = font->subfont_map[glyph].position;
1047         textwid += font->widths[glyph] * fontsize;
1048
1049         str++;
1050     }
1051
1052     if (textpos > 0) {
1053         text[textpos] = '\0';
1054         add_string_to_page(page, x, y, subfont, fontsize, text);
1055         x += textwid;
1056     }
1057
1058     return x;
1059 }
1060
1061 /*
1062  * Returns the updated x coordinate.
1063  */
1064 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
1065                        int x, int y, word *text, word *text_end, xref **xr,
1066                        int shortfall, int nspaces, int *nspace,
1067                        keywordlist *keywords)
1068 {
1069     while (text && text != text_end) {
1070         int style, type, findex, errs;
1071         wchar_t *str;
1072         xref_dest dest;
1073
1074         switch (text->type) {
1075             /*
1076              * Start a cross-reference.
1077              */
1078           case word_HyperLink:
1079           case word_UpperXref:
1080           case word_LowerXref:
1081
1082             if (text->type == word_HyperLink) {
1083                 dest.type = URL;
1084                 dest.url = utoa_dup(text->text);
1085                 dest.page = NULL;
1086             } else {
1087                 keyword *kwl = kw_lookup(keywords, text->text);
1088                 para_data *pdata;
1089
1090                 if (kwl) {
1091                     assert(kwl->para->private_data);
1092                     pdata = (para_data *) kwl->para->private_data;
1093                     dest.type = PAGE;
1094                     dest.page = pdata->first->page;
1095                     dest.url = NULL;
1096                 } else {
1097                     /*
1098                      * Shouldn't happen, but *shrug*
1099                      */
1100                     dest.type = NONE;
1101                     dest.page = NULL;
1102                     dest.url = NULL;
1103                 }
1104             }
1105             if (dest.type != NONE) {
1106                 *xr = mknew(xref);
1107                 (*xr)->dest = dest;    /* structure copy */
1108                 if (page->last_xref)
1109                     page->last_xref->next = *xr;
1110                 else
1111                     page->first_xref = *xr;
1112                 page->last_xref = *xr;
1113                 (*xr)->next = NULL;
1114
1115                 /*
1116                  * FIXME: Ideally we should have, and use, some
1117                  * vertical font metric information here so that
1118                  * our cross-ref rectangle can take account of
1119                  * descenders and the font's cap height. This will
1120                  * do for the moment, but it isn't ideal.
1121                  */
1122                 (*xr)->lx = (*xr)->rx = x;
1123                 (*xr)->by = y;
1124                 (*xr)->ty = y + ldata->line_height;
1125             }
1126             goto nextword;
1127
1128             /*
1129              * Finish extending a cross-reference box.
1130              */
1131           case word_HyperEnd:
1132           case word_XrefEnd:
1133             *xr = NULL;
1134             goto nextword;
1135
1136           case word_IndexRef:
1137             goto nextword;
1138             /*
1139              * FIXME: we should do something with this.
1140              */
1141         }
1142
1143         style = towordstyle(text->type);
1144         type = removeattr(text->type);
1145
1146         findex = (style == word_Normal ? FONT_NORMAL :
1147                   style == word_Emph ? FONT_EMPH :
1148                   FONT_CODE);
1149
1150         if (type == word_Normal) {
1151             str = text->text;
1152         } else if (type == word_WhiteSpace) {
1153             x += pdata->sizes[findex] *
1154                 string_width(pdata->fonts[findex], L" ", NULL);
1155             if (nspaces && findex != FONT_CODE) {
1156                 x += (*nspace+1) * shortfall / nspaces;
1157                 x -= *nspace * shortfall / nspaces;
1158                 (*nspace)++;
1159             }
1160             goto nextword;
1161         } else /* if (type == word_Quote) */ {
1162             if (text->aux == quote_Open)
1163                 str = L"\x2018";               /* FIXME: configurability! */
1164             else
1165                 str = L"\x2019";               /* FIXME: configurability! */
1166         }
1167
1168         (void) string_width(pdata->fonts[findex], str, &errs);
1169
1170         if (errs && text->alt)
1171             x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1172                             xr, shortfall, nspaces, nspace, keywords);
1173         else
1174             x = render_string(page, pdata->fonts[findex],
1175                               pdata->sizes[findex], x, y, str);
1176
1177         if (*xr)
1178             (*xr)->rx = x;
1179
1180         nextword:
1181         text = text->next;
1182     }
1183
1184     return x;
1185 }
1186
1187 static void render_line(line_data *ldata, int left_x, int top_y,
1188                         xref_dest *dest, keywordlist *keywords)
1189 {
1190     int nspace;
1191     xref *xr;
1192
1193     if (ldata->aux_text) {
1194         int x;
1195         xr = NULL;
1196         nspace = 0;
1197         x = render_text(ldata->page, ldata->pdata, ldata,
1198                         left_x + ldata->aux_left_indent,
1199                         top_y - ldata->ypos,
1200                         ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1201         if (ldata->aux_text_2)
1202             render_text(ldata->page, ldata->pdata, ldata,
1203                         x, top_y - ldata->ypos,
1204                         ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1205     }
1206     nspace = 0;
1207
1208     if (ldata->first) {
1209         /*
1210          * There might be a cross-reference carried over from a
1211          * previous line.
1212          */
1213         if (dest->type != NONE) {
1214             xr = mknew(xref);
1215             xr->next = NULL;
1216             xr->dest = *dest;    /* structure copy */
1217             if (ldata->page->last_xref)
1218                 ldata->page->last_xref->next = xr;
1219             else
1220                 ldata->page->first_xref = xr;
1221             ldata->page->last_xref = xr;
1222             xr->lx = xr->rx = left_x + ldata->xpos;
1223             xr->by = top_y - ldata->ypos;
1224             xr->ty = top_y - ldata->ypos + ldata->line_height;
1225         } else
1226             xr = NULL;
1227
1228         render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1229                     top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1230                     ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1231
1232         if (xr) {
1233             /*
1234              * There's a cross-reference continued on to the next line.
1235              */
1236             *dest = xr->dest;
1237         } else
1238             dest->type = NONE;
1239     }
1240 }
1241
1242 static void code_paragraph(para_data *pdata,
1243                            font_data *fn, font_data *fi, font_data *fb,
1244                            int font_size, int indent, word *words)
1245 {
1246     /*
1247      * For code paragraphs, I'm going to hack grievously and
1248      * pretend the three normal fonts are the three code paragraph
1249      * fonts.
1250      */
1251     pdata->fonts[FONT_NORMAL] = fb;
1252     pdata->fonts[FONT_EMPH] = fi;
1253     pdata->fonts[FONT_CODE] = fn;
1254     pdata->sizes[FONT_NORMAL] =
1255         pdata->sizes[FONT_EMPH] =
1256         pdata->sizes[FONT_CODE] = font_size;
1257
1258     pdata->first = pdata->last = NULL;
1259
1260     for (; words; words = words->next) {
1261         wchar_t *t, *e, *start;
1262         word *lhead = NULL, *ltail = NULL, *w;
1263         line_data *ldata;
1264         int prev = -1, curr;
1265
1266         t = words->text;
1267         if (words->next && words->next->type == word_Emph) {
1268             e = words->next->text;
1269             words = words->next;
1270         } else
1271             e = NULL;
1272
1273         start = t;
1274
1275         while (*start) {
1276             while (*t) {
1277                 if (!e || !*e)
1278                     curr = 0;
1279                 else if (*e == L'i')
1280                     curr = 1;
1281                 else if (*e == L'b')
1282                     curr = 2;
1283                 else
1284                     curr = 0;
1285
1286                 if (prev < 0)
1287                     prev = curr;
1288
1289                 if (curr != prev)
1290                     break;
1291
1292                 t++;
1293                 if (e && *e)
1294                     e++;
1295             }
1296
1297             /*
1298              * We've isolated a maximal subsequence of the line
1299              * which has the same emphasis. Form it into a word
1300              * structure.
1301              */
1302             w = mknew(word);
1303             w->next = NULL;
1304             w->alt = NULL;
1305             w->type = (prev == 0 ? word_WeakCode :
1306                       prev == 1 ? word_Emph : word_Normal);
1307             w->text = mknewa(wchar_t, t-start+1);
1308             memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1309             w->text[t-start] = '\0';
1310             w->breaks = FALSE;
1311
1312             if (ltail)
1313                 ltail->next = w;
1314             else
1315                 lhead = w;
1316             ltail = w;
1317
1318             start = t;
1319             prev = -1;
1320         }
1321
1322         ldata = mknew(line_data);
1323
1324         ldata->pdata = pdata;
1325         ldata->first = lhead;
1326         ldata->end = NULL;
1327         ldata->line_height = font_size * 4096;
1328
1329         ldata->xpos = indent;
1330
1331         if (pdata->last) {
1332             pdata->last->next = ldata;
1333             ldata->prev = pdata->last;
1334         } else {
1335             pdata->first = ldata;
1336             ldata->prev = NULL;
1337         }
1338         ldata->next = NULL;
1339         pdata->last = ldata;
1340
1341         ldata->hshortfall = 0;
1342         ldata->nspaces = 0;
1343         ldata->aux_text = NULL;
1344         ldata->aux_text_2 = NULL;
1345         ldata->aux_left_indent = 0;
1346         /* General opprobrium for breaking in a code paragraph. */
1347         ldata->penalty_before = ldata->penalty_after = 50000;
1348     }
1349 }
1350
1351 static void rule_paragraph(para_data *pdata, int indent, int height)
1352 {
1353     line_data *ldata;
1354
1355     ldata = mknew(line_data);
1356
1357     ldata->pdata = pdata;
1358     ldata->first = NULL;
1359     ldata->end = NULL;
1360     ldata->line_height = height;
1361
1362     ldata->xpos = indent;
1363
1364     ldata->prev = NULL;
1365     ldata->next = NULL;
1366
1367     ldata->hshortfall = 0;
1368     ldata->nspaces = 0;
1369     ldata->aux_text = NULL;
1370     ldata->aux_text_2 = NULL;
1371     ldata->aux_left_indent = 0;
1372
1373     /*
1374      * Better to break after a rule than before it
1375      */
1376     ldata->penalty_after += 100000;
1377     ldata->penalty_before += -100000;
1378
1379     pdata->first = pdata->last = ldata;
1380 }