mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_paper.c

   1 /*
   2  * Paper printing pre-backend for Halibut.
   3  *
   4  * This module does all the processing common to both PostScript
   5  * and PDF output: selecting fonts, line wrapping and page breaking
   6  * in accordance with font metrics, laying out the contents and
   7  * index pages, generally doing all the page layout. After this,
   8  * bk_ps.c and bk_pdf.c should only need to do linear translations
   9  * into their literal output format.
  10  */
  11
  12 /*
  13  * To be done:
  14  *
  15  *  - set up contents section now we know what sections begin on
  16  *    which pages
  17  *
  18  *  - do PDF outline
  19  *
  20  *  - index
  21  *
  22  * That should bring us to the same level of functionality that
  23  * original-Halibut had, and the same in PDF plus the obvious
  24  * interactive navigation features. After that, in future work:
  25  *
  26  *  - linearised PDF, perhaps?
  27  *
  28  *  - I'm uncertain of whether I need to include a ToUnicode CMap
  29  *    in each of my font definitions in PDF. Currently things (by
  30  *    which I mean cut and paste out of acroread) seem to be
  31  *    working fairly happily without it, but I don't know.
  32  *
  33  *  - configurability
  34  *
  35  *  - title pages
  36  */
  37
  38 #include <assert.h>
  39 #include <stdio.h>
  40
  41 #include "halibut.h"
  42 #include "paper.h"
  43
  44 static font_data *make_std_font(font_list *fontlist, char const *name);
  45 static void wrap_paragraph(para_data *pdata, word *words,
  46                            int w, int i1, int i2);
  47 static page_data *page_breaks(line_data *first, line_data *last,
  48                               int page_height);
  49 static void render_line(line_data *ldata, int left_x, int top_y,
  50                         xref_dest *dest, keywordlist *keywords);
  51 static int paper_width_simple(para_data *pdata, word *text);
  52 static void code_paragraph(para_data *pdata,
  53                            font_data *fn, font_data *fi, font_data *fb,
  54                            int font_size, int indent, word *words);
  55
  56 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
  57                         indexdata *idx) {
  58     paragraph *p;
  59     document *doc;
  60     int indent, extra_indent, firstline_indent, aux_indent;
  61     para_data *pdata;
  62     line_data *ldata, *firstline, *lastline;
  63     font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
  64     page_data *pages;
  65     font_list *fontlist;
  66     word *aux, *aux2;
  67
  68     /*
  69      * FIXME: All these things ought to become configurable.
  70      */
  71     int paper_width = 595 * 4096;
  72     int paper_height = 841 * 4096;
  73     int left_margin = 72 * 4096;
  74     int top_margin = 72 * 4096;
  75     int right_margin = 72 * 4096;
  76     int bottom_margin = 108 * 4096;
  77     int indent_list_bullet = 6 * 4096;
  78     int indent_list = 24 * 4096;
  79     int indent_quote = 18 * 4096;
  80     int base_leading = 4096;
  81     int base_para_spacing = 10 * 4096;
  82     int chapter_top_space = 72 * 4096;
  83     int sect_num_left_space = 12 * 4096;
  84
  85     int base_width = paper_width - left_margin - right_margin;
  86     int page_height = paper_height - top_margin - bottom_margin;
  87
  88     IGNORE(keywords);                  /* FIXME */
  89     IGNORE(idx);                       /* FIXME */
  90
  91     /*
  92      * First, set up some font structures.
  93      */
  94     fontlist = mknew(font_list);
  95     fontlist->head = fontlist->tail = NULL;
  96     tr = make_std_font(fontlist, "Times-Roman");
  97     ti = make_std_font(fontlist, "Times-Italic");
  98     hr = make_std_font(fontlist, "Helvetica-Bold");
  99     hi = make_std_font(fontlist, "Helvetica-BoldOblique");
 100     cr = make_std_font(fontlist, "Courier");
 101     co = make_std_font(fontlist, "Courier-Oblique");
 102     cb = make_std_font(fontlist, "Courier-Bold");
 103
 104     /*
 105      * Go through and break up each paragraph into lines.
 106      */
 107     indent = 0;
 108     firstline = lastline = NULL;
 109     for (p = sourceform; p; p = p->next) {
 110         p->private_data = NULL;
 111
 112         switch (p->type) {
 113             /*
 114              * These paragraph types are either invisible or don't
 115              * define text in the normal sense. Either way, they
 116              * don't require wrapping.
 117              */
 118           case para_IM:
 119           case para_BR:
 120           case para_Rule:
 121           case para_Biblio:
 122           case para_NotParaType:
 123           case para_Config:
 124           case para_VersionID:
 125           case para_NoCite:
 126             break;
 127
 128             /*
 129              * These paragraph types don't require wrapping, but
 130              * they do affect the line width to which we wrap the
 131              * rest of the paragraphs, so we need to pay attention.
 132              */
 133           case para_LcontPush:
 134             indent += indent_list; break;
 135           case para_LcontPop:
 136             indent -= indent_list; assert(indent >= 0); break;
 137           case para_QuotePush:
 138             indent += indent_quote; break;
 139           case para_QuotePop:
 140             indent -= indent_quote; assert(indent >= 0); break;
 141
 142             /*
 143              * This paragraph type is special. Process it
 144              * specially.
 145              */
 146           case para_Code:
 147             pdata = mknew(para_data);
 148             code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
 149             p->private_data = pdata;
 150             break;
 151
 152             /*
 153              * All of these paragraph types require wrapping in the
 154              * ordinary way. So we must supply a set of fonts, a
 155              * line width and auxiliary information (e.g. bullet
 156              * text) for each one.
 157              */
 158           case para_Chapter:
 159           case para_Appendix:
 160           case para_UnnumberedChapter:
 161           case para_Heading:
 162           case para_Subsect:
 163           case para_Normal:
 164           case para_BiblioCited:
 165           case para_Bullet:
 166           case para_NumberedList:
 167           case para_DescribedThing:
 168           case para_Description:
 169           case para_Copyright:
 170           case para_Title:
 171             pdata = mknew(para_data);
 172
 173             /*
 174              * Choose fonts for this paragraph.
 175              *
 176              * FIXME: All of this ought to be completely
 177              * user-configurable.
 178              */
 179             switch (p->type) {
 180               case para_Title:
 181                 pdata->fonts[FONT_NORMAL] = hr;
 182                 pdata->sizes[FONT_NORMAL] = 24;
 183                 pdata->fonts[FONT_EMPH] = hi;
 184                 pdata->sizes[FONT_EMPH] = 24;
 185                 pdata->fonts[FONT_CODE] = cb;
 186                 pdata->sizes[FONT_CODE] = 24;
 187                 break;
 188
 189               case para_Chapter:
 190               case para_Appendix:
 191               case para_UnnumberedChapter:
 192                 pdata->fonts[FONT_NORMAL] = hr;
 193                 pdata->sizes[FONT_NORMAL] = 20;
 194                 pdata->fonts[FONT_EMPH] = hi;
 195                 pdata->sizes[FONT_EMPH] = 20;
 196                 pdata->fonts[FONT_CODE] = cb;
 197                 pdata->sizes[FONT_CODE] = 20;
 198                 break;
 199
 200               case para_Heading:
 201               case para_Subsect:
 202                 pdata->fonts[FONT_NORMAL] = hr;
 203                 pdata->fonts[FONT_EMPH] = hi;
 204                 pdata->fonts[FONT_CODE] = cb;
 205                 pdata->sizes[FONT_NORMAL] =
 206                     pdata->sizes[FONT_EMPH] =
 207                     pdata->sizes[FONT_CODE] =
 208                     (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
 209                 break;
 210
 211               case para_Normal:
 212               case para_BiblioCited:
 213               case para_Bullet:
 214               case para_NumberedList:
 215               case para_DescribedThing:
 216               case para_Description:
 217               case para_Copyright:
 218                 pdata->fonts[FONT_NORMAL] = tr;
 219                 pdata->sizes[FONT_NORMAL] = 12;
 220                 pdata->fonts[FONT_EMPH] = ti;
 221                 pdata->sizes[FONT_EMPH] = 12;
 222                 pdata->fonts[FONT_CODE] = cr;
 223                 pdata->sizes[FONT_CODE] = 12;
 224                 break;
 225             }
 226
 227             /*
 228              * Also select an indentation level depending on the
 229              * paragraph type (list paragraphs other than
 230              * para_DescribedThing need extra indent).
 231              *
 232              * (FIXME: Perhaps at some point we might even arrange
 233              * for the user to be able to request indented first
 234              * lines in paragraphs.)
 235              */
 236             if (p->type == para_Bullet ||
 237                 p->type == para_NumberedList ||
 238                 p->type == para_Description) {
 239                 extra_indent = firstline_indent = indent_list;
 240             } else {
 241                 extra_indent = firstline_indent = 0;
 242             }
 243
 244             /*
 245              * Find the auxiliary text for this paragraph.
 246              */
 247             aux = aux2 = NULL;
 248             aux_indent = 0;
 249
 250             switch (p->type) {
 251               case para_Chapter:
 252               case para_Appendix:
 253               case para_Heading:
 254               case para_Subsect:
 255                 /*
 256                  * For some heading styles (FIXME: be able to
 257                  * configure which), the auxiliary text contains
 258                  * the chapter number and is arranged to be
 259                  * right-aligned a few points left of the primary
 260                  * margin. For other styles, the auxiliary text is
 261                  * the full chapter _name_ and takes up space
 262                  * within the (wrapped) chapter title, meaning that
 263                  * we must move the first line indent over to make
 264                  * space for it.
 265                  */
 266                 if (p->type == para_Heading || p->type == para_Subsect) {
 267                     int len;
 268
 269                     aux = p->kwtext2;
 270                     len = paper_width_simple(pdata, p->kwtext2);
 271                     aux_indent = -len - sect_num_left_space;
 272                 } else {
 273                     aux = p->kwtext;
 274                     aux2 = mknew(word);
 275                     aux2->next = NULL;
 276                     aux2->alt = NULL;
 277                     aux2->type = word_Normal;
 278                     aux2->text = ustrdup(L": ");
 279                     aux2->breaks = FALSE;
 280                     aux2->aux = 0;
 281                     aux_indent = 0;
 282
 283                     firstline_indent += paper_width_simple(pdata, aux);
 284                     firstline_indent += paper_width_simple(pdata, aux2);
 285                 }
 286                 break;
 287
 288               case para_Bullet:
 289                 /*
 290                  * Auxiliary text consisting of a bullet. (FIXME:
 291                  * configurable bullet.)
 292                  */
 293                 aux = mknew(word);
 294                 aux->next = NULL;
 295                 aux->alt = NULL;
 296                 aux->type = word_Normal;
 297                 aux->text = ustrdup(L"\x2022");
 298                 aux->breaks = FALSE;
 299                 aux->aux = 0;
 300                 aux_indent = indent + indent_list_bullet;
 301                 break;
 302
 303               case para_NumberedList:
 304                 /*
 305                  * Auxiliary text consisting of the number followed
 306                  * by a (FIXME: configurable) full stop.
 307                  */
 308                 aux = p->kwtext;
 309                 aux2 = mknew(word);
 310                 aux2->next = NULL;
 311                 aux2->alt = NULL;
 312                 aux2->type = word_Normal;
 313                 aux2->text = ustrdup(L".");
 314                 aux2->breaks = FALSE;
 315                 aux2->aux = 0;
 316                 aux_indent = indent + indent_list_bullet;
 317                 break;
 318
 319               case para_BiblioCited:
 320                 /*
 321                  * Auxiliary text consisting of the bibliography
 322                  * reference text, and a trailing space.
 323                  */
 324                 aux = p->kwtext;
 325                 aux2 = mknew(word);
 326                 aux2->next = NULL;
 327                 aux2->alt = NULL;
 328                 aux2->type = word_Normal;
 329                 aux2->text = ustrdup(L" ");
 330                 aux2->breaks = FALSE;
 331                 aux2->aux = 0;
 332                 aux_indent = indent;
 333                 firstline_indent += paper_width_simple(pdata, aux);
 334                 firstline_indent += paper_width_simple(pdata, aux2);
 335                 break;
 336             }
 337
 338             wrap_paragraph(pdata, p->words, base_width,
 339                            indent + firstline_indent,
 340                            indent + extra_indent);
 341
 342             p->private_data = pdata;
 343
 344             pdata->first->aux_text = aux;
 345             pdata->first->aux_text_2 = aux2;
 346             pdata->first->aux_left_indent = aux_indent;
 347
 348             break;
 349         }
 350
 351         if (p->private_data) {
 352             pdata = (para_data *)p->private_data;
 353
 354             /*
 355              * Set the line spacing for each line in this paragraph.
 356              */
 357             for (ldata = pdata->first; ldata; ldata = ldata->next) {
 358                 if (ldata == pdata->first)
 359                     ldata->space_before = base_para_spacing / 2;
 360                 else
 361                     ldata->space_before = base_leading / 2;
 362                 if (ldata == pdata->last)
 363                     ldata->space_after = base_para_spacing / 2;
 364                 else
 365                     ldata->space_after = base_leading / 2;
 366                 ldata->page_break = FALSE;
 367             }
 368
 369             /*
 370              * Some kinds of section heading do require a page
 371              * break before them.
 372              */
 373             if (p->type == para_Title ||
 374                 p->type == para_Chapter ||
 375                 p->type == para_Appendix ||
 376                 p->type == para_UnnumberedChapter) {
 377                 pdata->first->page_break = TRUE;
 378                 pdata->first->space_before = chapter_top_space;
 379             }
 380
 381             /*
 382              * Link all line structures together into a big list.
 383              */
 384             if (pdata->first) {
 385                 if (lastline) {
 386                     lastline->next = pdata->first;
 387                     pdata->first->prev = lastline;
 388                 } else {
 389                     firstline = pdata->first;
 390                     pdata->first->prev = NULL;
 391                 }
 392                 lastline = pdata->last;
 393             }
 394         }
 395     }
 396
 397     /*
 398      * Now we have an enormous linked list of every line of text in
 399      * the document. Break it up into pages.
 400      */
 401     pages = page_breaks(firstline, lastline, page_height);
 402
 403     /*
 404      * Now we're ready to actually lay out the pages. We do this by
 405      * looping over _paragraphs_, since we may need to track cross-
 406      * references between lines and even across pages.
 407      */
 408     for (p = sourceform; p; p = p->next) {
 409         pdata = (para_data *)p->private_data;
 410
 411         if (pdata) {
 412             xref_dest dest;
 413             dest.type = NONE;
 414             for (ldata = pdata->first; ldata; ldata = ldata->next) {
 415                 render_line(ldata, left_margin, paper_height - top_margin,
 416                             &dest, keywords);
 417                 if (ldata == pdata->last)
 418                     break;
 419             }
 420         }
 421     }
 422
 423     doc = mknew(document);
 424     doc->fonts = fontlist;
 425     doc->pages = pages;
 426     doc->paper_width = paper_width;
 427     doc->paper_height = paper_height;
 428     return doc;
 429 }
 430
 431 static font_encoding *new_font_encoding(font_data *font)
 432 {
 433     font_encoding *fe;
 434     int i;
 435
 436     fe = mknew(font_encoding);
 437     fe->next = NULL;
 438
 439     if (font->list->tail)
 440         font->list->tail->next = fe;
 441     else
 442         font->list->head = fe;
 443     font->list->tail = fe;
 444
 445     fe->font = font;
 446     fe->free_pos = 0x21;
 447
 448     for (i = 0; i < 256; i++) {
 449         fe->vector[i] = NULL;
 450         fe->indices[i] = -1;
 451         fe->to_unicode[i] = 0xFFFF;
 452     }
 453
 454     return fe;
 455 }
 456
 457 static font_data *make_std_font(font_list *fontlist, char const *name)
 458 {
 459     const int *widths;
 460     int nglyphs;
 461     font_data *f;
 462     font_encoding *fe;
 463     int i;
 464
 465     widths = ps_std_font_widths(name);
 466     if (!widths)
 467         return NULL;
 468
 469     for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
 470
 471     f = mknew(font_data);
 472
 473     f->list = fontlist;
 474     f->name = name;
 475     f->nglyphs = nglyphs;
 476     f->glyphs = ps_std_glyphs;
 477     f->widths = widths;
 478     f->subfont_map = mknewa(subfont_map_entry, nglyphs);
 479
 480     /*
 481      * Our first subfont will contain all of US-ASCII. This isn't
 482      * really necessary - we could just create custom subfonts
 483      * precisely as the whim of render_string dictated - but
 484      * instinct suggests that it might be nice to have the text in
 485      * the output files look _marginally_ recognisable.
 486      */
 487     fe = new_font_encoding(f);
 488     fe->free_pos = 0xA1;               /* only the top half is free */
 489     f->latest_subfont = fe;
 490
 491     for (i = 0; i < (int)lenof(f->bmp); i++)
 492         f->bmp[i] = 0xFFFF;
 493
 494     for (i = 0; i < nglyphs; i++) {
 495         wchar_t ucs;
 496         ucs = ps_glyph_to_unicode(f->glyphs[i]);
 497         assert(ucs != 0xFFFF);
 498         f->bmp[ucs] = i;
 499         if (ucs >= 0x20 && ucs <= 0x7E) {
 500             fe->vector[ucs] = f->glyphs[i];
 501             fe->indices[ucs] = i;
 502             fe->to_unicode[ucs] = ucs;
 503             f->subfont_map[i].subfont = fe;
 504             f->subfont_map[i].position = ucs;
 505         } else {
 506             /*
 507              * This character is not yet assigned to a subfont.
 508              */
 509             f->subfont_map[i].subfont = NULL;
 510             f->subfont_map[i].position = 0;
 511         }
 512     }
 513
 514     return f;
 515 }
 516
 517 static int string_width(font_data *font, wchar_t const *string, int *errs)
 518 {
 519     int width = 0;
 520
 521     if (errs)
 522         *errs = 0;
 523
 524     for (; *string; string++) {
 525         int index;
 526
 527         index = font->bmp[(unsigned short)*string];
 528         if (index == 0xFFFF) {
 529             if (errs)
 530                 *errs = 1;
 531         } else {
 532             width += font->widths[index];
 533         }
 534     }
 535
 536     return width;
 537 }
 538
 539 static int paper_width_internal(void *vctx, word *word, int *nspaces);
 540
 541 struct paper_width_ctx {
 542     int minspacewidth;
 543     para_data *pdata;
 544 };
 545
 546 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
 547     int w = 0;
 548     while (text && text != end) {
 549         w += paper_width_internal(vctx, text, nspaces);
 550         text = text->next;
 551     }
 552     return w;
 553 }
 554
 555 static int paper_width_internal(void *vctx, word *word, int *nspaces)
 556 {
 557     struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
 558     int style, type, findex, width, errs;
 559     wchar_t *str;
 560
 561     switch (word->type) {
 562       case word_HyperLink:
 563       case word_HyperEnd:
 564       case word_UpperXref:
 565       case word_LowerXref:
 566       case word_XrefEnd:
 567       case word_IndexRef:
 568         return 0;
 569     }
 570
 571     style = towordstyle(word->type);
 572     type = removeattr(word->type);
 573
 574     findex = (style == word_Normal ? FONT_NORMAL :
 575               style == word_Emph ? FONT_EMPH :
 576               FONT_CODE);
 577
 578     if (type == word_Normal) {
 579         str = word->text;
 580     } else if (type == word_WhiteSpace) {
 581         if (findex != FONT_CODE) {
 582             if (nspaces)
 583                 (*nspaces)++;
 584             return ctx->minspacewidth;
 585         } else
 586             str = L" ";
 587     } else /* if (type == word_Quote) */ {
 588         if (word->aux == quote_Open)
 589             str = L"\x2018";           /* FIXME: configurability! */
 590         else
 591             str = L"\x2019";           /* FIXME: configurability! */
 592     }
 593
 594     width = string_width(ctx->pdata->fonts[findex], str, &errs);
 595
 596     if (errs && word->alt)
 597         return paper_width_list(vctx, word->alt, NULL, nspaces);
 598     else
 599         return ctx->pdata->sizes[findex] * width;
 600 }
 601
 602 static int paper_width(void *vctx, word *word)
 603 {
 604     return paper_width_internal(vctx, word, NULL);
 605 }
 606
 607 static int paper_width_simple(para_data *pdata, word *text)
 608 {
 609     struct paper_width_ctx ctx;
 610
 611     ctx.pdata = pdata;
 612     ctx.minspacewidth =
 613         (pdata->sizes[FONT_NORMAL] *
 614          string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
 615
 616     return paper_width_list(&ctx, text, NULL, NULL);
 617 }
 618
 619 static void wrap_paragraph(para_data *pdata, word *words,
 620                            int w, int i1, int i2)
 621 {
 622     wrappedline *wrapping, *p;
 623     int spacewidth;
 624     struct paper_width_ctx ctx;
 625     int line_height;
 626
 627     /*
 628      * We're going to need to store the line height in every line
 629      * structure we generate.
 630      */
 631     {
 632         int i;
 633         line_height = 0;
 634         for (i = 0; i < NFONTS; i++)
 635             if (line_height < pdata->sizes[i])
 636                 line_height = pdata->sizes[i];
 637         line_height *= 4096;
 638     }
 639
 640     spacewidth = (pdata->sizes[FONT_NORMAL] *
 641                   string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
 642     if (spacewidth == 0) {
 643         /*
 644          * A font without a space?! Disturbing. I hope this never
 645          * comes up, but I'll make a random guess anyway and set my
 646          * space width to half the point size.
 647          */
 648         spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
 649     }
 650
 651     /*
 652      * I'm going to set the _minimum_ space width to 3/5 of the
 653      * standard one, and use the standard one as the optimum.
 654      */
 655     ctx.minspacewidth = spacewidth * 3 / 5;
 656     ctx.pdata = pdata;
 657
 658     wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
 659
 660     /*
 661      * Having done the wrapping, we now concoct a set of line_data
 662      * structures.
 663      */
 664     pdata->first = pdata->last = NULL;
 665
 666     for (p = wrapping; p; p = p->next) {
 667         line_data *ldata;
 668         word *wd;
 669         int len, wid, spaces;
 670
 671         ldata = mknew(line_data);
 672
 673         ldata->pdata = pdata;
 674         ldata->first = p->begin;
 675         ldata->end = p->end;
 676         ldata->line_height = line_height;
 677
 678         ldata->xpos = (p == wrapping ? i1 : i2);
 679
 680         if (pdata->last) {
 681             pdata->last->next = ldata;
 682             ldata->prev = pdata->last;
 683         } else {
 684             pdata->first = ldata;
 685             ldata->prev = NULL;
 686         }
 687         ldata->next = NULL;
 688         pdata->last = ldata;
 689
 690         spaces = 0;
 691         len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
 692         wid = (p == wrapping ? w - i1 : w - i2);
 693         wd = ldata->first;
 694
 695         ldata->hshortfall = wid - len;
 696         ldata->nspaces = spaces;
 697         /*
 698          * This tells us how much the space width needs to
 699          * change from _min_spacewidth. But we want to store
 700          * its difference from the _natural_ space width, to
 701          * make the text rendering easier.
 702          */
 703         ldata->hshortfall += ctx.minspacewidth * spaces;
 704         ldata->hshortfall -= spacewidth * spaces;
 705         /*
 706          * Special case: on the last line of a paragraph, we
 707          * never stretch spaces.
 708          */
 709         if (ldata->hshortfall > 0 && !p->next)
 710             ldata->hshortfall = 0;
 711
 712         ldata->aux_text = NULL;
 713         ldata->aux_text_2 = NULL;
 714         ldata->aux_left_indent = 0;
 715     }
 716
 717 }
 718
 719 static page_data *page_breaks(line_data *first, line_data *last,
 720                               int page_height)
 721 {
 722     line_data *l, *m;
 723     page_data *ph, *pt;
 724
 725     /*
 726      * Page breaking is done by a close analogue of the optimal
 727      * paragraph wrapping algorithm used by wrap_para(). We work
 728      * backwards from the end of the document line by line; for
 729      * each line, we contemplate every possible number of lines we
 730      * could put on a page starting with that line, determine a
 731      * cost function for each one, add it to the pre-computed cost
 732      * function for optimally page-breaking everything after that
 733      * page, and pick the best option.
 734      *
 735      * Since my line_data structures are only used for this
 736      * purpose, I might as well just store the algorithm data
 737      * directly in them.
 738      */
 739
 740     for (l = last; l; l = l->prev) {
 741         int minheight, text = 0, space = 0;
 742         int cost;
 743
 744         l->bestcost = -1;
 745         for (m = l; m; m = m->next) {
 746             if (m != l && m->page_break)
 747                 break;                 /* we've gone as far as we can */
 748
 749             if (m != l)
 750                 space += m->prev->space_after;
 751             if (m != l || m->page_break)
 752                 space += m->space_before;
 753             text += m->line_height;
 754             minheight = text + space;
 755
 756             if (m != l && minheight > page_height)
 757                 break;
 758
 759             /*
 760              * Compute the cost of this arrangement, as the square
 761              * of the amount of wasted space on the page.
 762              * Exception: if this is the last page before a
 763              * mandatory break or the document end, we don't
 764              * penalise a large blank area.
 765              */
 766             if (m->next && !m->next->page_break)
 767             {
 768                 int x = page_height - minheight;
 769                 int xf;
 770
 771                 xf = x & 0xFF;
 772                 x >>= 8;
 773
 774                 cost = x*x;
 775                 cost += (x * xf) >> 8;
 776             } else
 777                 cost = 0;
 778
 779             /*
 780              * FIXME: here I should introduce penalties for
 781              * breaking in mid-paragraph, particularly very close
 782              * to one end of a paragraph and particularly in code
 783              * paragraphs.
 784              */
 785
 786             if (m->next && !m->next->page_break)
 787                 cost += m->next->bestcost;
 788
 789             if (l->bestcost == -1 || l->bestcost > cost) {
 790                 /*
 791                  * This is the best option yet for this starting
 792                  * point.
 793                  */
 794                 l->bestcost = cost;
 795                 if (m->next && !m->next->page_break)
 796                     l->vshortfall = page_height - minheight;
 797                 else
 798                     l->vshortfall = 0;
 799                 l->text = text;
 800                 l->space = space;
 801                 l->page_last = m;
 802             }
 803         }
 804     }
 805
 806     /*
 807      * Now go through the line list forwards and assemble the
 808      * actual pages.
 809      */
 810     ph = pt = NULL;
 811
 812     l = first;
 813     while (l) {
 814         page_data *page;
 815         int text, space;
 816
 817         page = mknew(page_data);
 818         page->next = NULL;
 819         page->prev = pt;
 820         if (pt)
 821             pt->next = page;
 822         else
 823             ph = page;
 824         pt = page;
 825
 826         page->first_line = l;
 827         page->last_line = l->page_last;
 828
 829         page->first_text = page->last_text = NULL;
 830
 831         page->first_xref = page->last_xref = NULL;
 832
 833         /*
 834          * Now assign a y-coordinate to each line on the page.
 835          */
 836         text = space = 0;
 837         for (l = page->first_line; l; l = l->next) {
 838             if (l != page->first_line)
 839                 space += l->prev->space_after;
 840             if (l != page->first_line || l->page_break)
 841                 space += l->space_before;
 842             text += l->line_height;
 843
 844             l->page = page;
 845             l->ypos = text + space +
 846                 space * (float)page->first_line->vshortfall /
 847                 page->first_line->space;
 848
 849             if (l == page->last_line)
 850                 break;
 851         }
 852
 853         l = page->last_line->next;
 854     }
 855
 856     return ph;
 857 }
 858
 859 static void add_string_to_page(page_data *page, int x, int y,
 860                                font_encoding *fe, int size, char *text)
 861 {
 862     text_fragment *frag;
 863
 864     frag = mknew(text_fragment);
 865     frag->next = NULL;
 866
 867     if (page->last_text)
 868         page->last_text->next = frag;
 869     else
 870         page->first_text = frag;
 871     page->last_text = frag;
 872
 873     frag->x = x;
 874     frag->y = y;
 875     frag->fe = fe;
 876     frag->fontsize = size;
 877     frag->text = dupstr(text);
 878 }
 879
 880 /*
 881  * Returns the updated x coordinate.
 882  */
 883 static int render_string(page_data *page, font_data *font, int fontsize,
 884                          int x, int y, wchar_t *str)
 885 {
 886     char *text;
 887     int textpos, textwid, glyph;
 888     font_encoding *subfont = NULL, *sf;
 889
 890     text = mknewa(char, 1 + ustrlen(str));
 891     textpos = textwid = 0;
 892
 893     while (*str) {
 894         glyph = font->bmp[*str];
 895
 896         if (glyph == 0xFFFF)
 897             continue;                  /* nothing more we can do here */
 898
 899         /*
 900          * Find which subfont this character is going in.
 901          */
 902         sf = font->subfont_map[glyph].subfont;
 903
 904         if (!sf) {
 905             int c;
 906
 907             /*
 908              * This character is not yet in a subfont. Assign one.
 909              */
 910             if (font->latest_subfont->free_pos >= 0x100)
 911                 font->latest_subfont = new_font_encoding(font);
 912
 913             c = font->latest_subfont->free_pos++;
 914             if (font->latest_subfont->free_pos == 0x7F)
 915                 font->latest_subfont->free_pos = 0xA1;
 916
 917             font->subfont_map[glyph].subfont = font->latest_subfont;
 918             font->subfont_map[glyph].position = c;
 919             font->latest_subfont->vector[c] = font->glyphs[glyph];
 920             font->latest_subfont->indices[c] = glyph;
 921             font->latest_subfont->to_unicode[c] = *str;
 922
 923             sf = font->latest_subfont;
 924         }
 925
 926         if (!subfont || sf != subfont) {
 927             if (subfont) {
 928                 text[textpos] = '\0';
 929                 add_string_to_page(page, x, y, subfont, fontsize, text);
 930                 x += textwid;
 931             } else {
 932                 assert(textpos == 0);
 933             }
 934             textpos = 0;
 935             subfont = sf;
 936         }
 937
 938         text[textpos++] = font->subfont_map[glyph].position;
 939         textwid += font->widths[glyph] * fontsize;
 940
 941         str++;
 942     }
 943
 944     if (textpos > 0) {
 945         text[textpos] = '\0';
 946         add_string_to_page(page, x, y, subfont, fontsize, text);
 947         x += textwid;
 948     }
 949
 950     return x;
 951 }
 952
 953 /*
 954  * Returns the updated x coordinate.
 955  */
 956 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
 957                        int x, int y, word *text, word *text_end, xref **xr,
 958                        int shortfall, int nspaces, int *nspace,
 959                        keywordlist *keywords)
 960 {
 961     while (text && text != text_end) {
 962         int style, type, findex, errs;
 963         wchar_t *str;
 964         xref_dest dest;
 965
 966         switch (text->type) {
 967             /*
 968              * Start a cross-reference.
 969              */
 970           case word_HyperLink:
 971           case word_UpperXref:
 972           case word_LowerXref:
 973
 974             if (text->type == word_HyperLink) {
 975                 dest.type = URL;
 976                 dest.url = utoa_dup(text->text);
 977                 dest.page = NULL;
 978             } else {
 979                 keyword *kwl = kw_lookup(keywords, text->text);
 980                 para_data *pdata;
 981
 982                 if (kwl) {
 983                     assert(kwl->para->private_data);
 984                     pdata = (para_data *) kwl->para->private_data;
 985                     dest.type = PAGE;
 986                     dest.page = pdata->first->page;
 987                     dest.url = NULL;
 988                 } else {
 989                     /*
 990                      * Shouldn't happen, but *shrug*
 991                      */
 992                     dest.type = NONE;
 993                     dest.page = NULL;
 994                     dest.url = NULL;
 995                 }
 996             }
 997             if (dest.type != NONE) {
 998                 *xr = mknew(xref);
 999                 (*xr)->dest = dest;    /* structure copy */
1000                 if (page->last_xref)
1001                     page->last_xref->next = *xr;
1002                 else
1003                     page->first_xref = *xr;
1004                 page->last_xref = *xr;
1005
1006                 /*
1007                  * FIXME: Ideally we should have, and use, some
1008                  * vertical font metric information here so that
1009                  * our cross-ref rectangle can take account of
1010                  * descenders and the font's cap height. This will
1011                  * do for the moment, but it isn't ideal.
1012                  */
1013                 (*xr)->lx = (*xr)->rx = x;
1014                 (*xr)->by = y;
1015                 (*xr)->ty = y + ldata->line_height;
1016             }
1017             goto nextword;
1018
1019             /*
1020              * Finish extending a cross-reference box.
1021              */
1022           case word_HyperEnd:
1023           case word_XrefEnd:
1024             *xr = NULL;
1025             goto nextword;
1026
1027           case word_IndexRef:
1028             goto nextword;
1029             /*
1030              * FIXME: we should do something with all of these!
1031              * Hyperlinks and xrefs have meaning in PDF, and this
1032              * is probably the right place to nail down the index
1033              * references too.
1034              */
1035         }
1036
1037         style = towordstyle(text->type);
1038         type = removeattr(text->type);
1039
1040         findex = (style == word_Normal ? FONT_NORMAL :
1041                   style == word_Emph ? FONT_EMPH :
1042                   FONT_CODE);
1043
1044         if (type == word_Normal) {
1045             str = text->text;
1046         } else if (type == word_WhiteSpace) {
1047             x += pdata->sizes[findex] *
1048                 string_width(pdata->fonts[findex], L" ", NULL);
1049             if (nspaces && findex != FONT_CODE) {
1050                 x += (*nspace+1) * shortfall / nspaces;
1051                 x -= *nspace * shortfall / nspaces;
1052                 (*nspace)++;
1053             }
1054             goto nextword;
1055         } else /* if (type == word_Quote) */ {
1056             if (text->aux == quote_Open)
1057                 str = L"\x2018";               /* FIXME: configurability! */
1058             else
1059                 str = L"\x2019";               /* FIXME: configurability! */
1060         }
1061
1062         (void) string_width(pdata->fonts[findex], str, &errs);
1063
1064         if (errs && text->alt)
1065             x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1066                             xr, shortfall, nspaces, nspace, keywords);
1067         else
1068             x = render_string(page, pdata->fonts[findex],
1069                               pdata->sizes[findex], x, y, str);
1070
1071         if (*xr)
1072             (*xr)->rx = x;
1073
1074         nextword:
1075         text = text->next;
1076     }
1077
1078     return x;
1079 }
1080
1081 static void render_line(line_data *ldata, int left_x, int top_y,
1082                         xref_dest *dest, keywordlist *keywords)
1083 {
1084     int nspace;
1085     xref *xr;
1086
1087     if (ldata->aux_text) {
1088         int x;
1089         xr = NULL;
1090         nspace = 0;
1091         x = render_text(ldata->page, ldata->pdata, ldata,
1092                         left_x + ldata->aux_left_indent,
1093                         top_y - ldata->ypos,
1094                         ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1095         if (ldata->aux_text_2)
1096             render_text(ldata->page, ldata->pdata, ldata,
1097                         x, top_y - ldata->ypos,
1098                         ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1099     }
1100     nspace = 0;
1101
1102     /*
1103      * There might be a cross-reference carried over from a
1104      * previous line.
1105      */
1106     if (dest->type != NONE) {
1107         xr = mknew(xref);
1108         xr->dest = *dest;    /* structure copy */
1109         if (ldata->page->last_xref)
1110             ldata->page->last_xref->next = xr;
1111         else
1112             ldata->page->first_xref = xr;
1113         ldata->page->last_xref = xr;
1114         xr->lx = xr->rx = left_x + ldata->xpos;
1115         xr->by = top_y - ldata->ypos;
1116         xr->ty = top_y - ldata->ypos + ldata->line_height;
1117     } else
1118         xr = NULL;
1119
1120     render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1121                 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1122                 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1123
1124     if (xr) {
1125         /*
1126          * There's a cross-reference continued on to the next line.
1127          */
1128         *dest = xr->dest;
1129     } else
1130         dest->type = NONE;
1131 }
1132
1133 static void code_paragraph(para_data *pdata,
1134                            font_data *fn, font_data *fi, font_data *fb,
1135                            int font_size, int indent, word *words)
1136 {
1137     /*
1138      * For code paragraphs, I'm going to hack grievously and
1139      * pretend the three normal fonts are the three code paragraph
1140      * fonts.
1141      */
1142     pdata->fonts[FONT_NORMAL] = fb;
1143     pdata->fonts[FONT_EMPH] = fi;
1144     pdata->fonts[FONT_CODE] = fn;
1145     pdata->sizes[FONT_NORMAL] =
1146         pdata->sizes[FONT_EMPH] =
1147         pdata->sizes[FONT_CODE] = font_size;
1148
1149     pdata->first = pdata->last = NULL;
1150
1151     for (; words; words = words->next) {
1152         wchar_t *t, *e, *start;
1153         word *lhead = NULL, *ltail = NULL, *w;
1154         line_data *ldata;
1155         int prev = -1, curr;
1156
1157         t = words->text;
1158         if (words->next && words->next->type == word_Emph) {
1159             e = words->next->text;
1160             words = words->next;
1161         } else
1162             e = NULL;
1163
1164         start = t;
1165
1166         while (*start) {
1167             while (*t) {
1168                 if (!e || !*e)
1169                     curr = 0;
1170                 else if (*e == L'i')
1171                     curr = 1;
1172                 else if (*e == L'b')
1173                     curr = 2;
1174                 else
1175                     curr = 0;
1176
1177                 if (prev < 0)
1178                     prev = curr;
1179
1180                 if (curr != prev)
1181                     break;
1182
1183                 t++;
1184                 if (e && *e)
1185                     e++;
1186             }
1187
1188             /*
1189              * We've isolated a maximal subsequence of the line
1190              * which has the same emphasis. Form it into a word
1191              * structure.
1192              */
1193             w = mknew(word);
1194             w->next = NULL;
1195             w->alt = NULL;
1196             w->type = (prev == 0 ? word_WeakCode :
1197                       prev == 1 ? word_Emph : word_Normal);
1198             w->text = mknewa(wchar_t, t-start+1);
1199             memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1200             w->text[t-start] = '\0';
1201             w->breaks = FALSE;
1202
1203             if (ltail)
1204                 ltail->next = w;
1205             else
1206                 lhead = w;
1207             ltail = w;
1208
1209             start = t;
1210             prev = -1;
1211         }
1212
1213         ldata = mknew(line_data);
1214
1215         ldata->pdata = pdata;
1216         ldata->first = lhead;
1217         ldata->end = NULL;
1218         ldata->line_height = font_size * 4096;
1219
1220         ldata->xpos = indent;
1221
1222         if (pdata->last) {
1223             pdata->last->next = ldata;
1224             ldata->prev = pdata->last;
1225         } else {
1226             pdata->first = ldata;
1227             ldata->prev = NULL;
1228         }
1229         ldata->next = NULL;
1230         pdata->last = ldata;
1231
1232         ldata->hshortfall = 0;
1233         ldata->nspaces = 0;
1234         ldata->aux_text = NULL;
1235         ldata->aux_text_2 = NULL;
1236         ldata->aux_left_indent = 0;
1237
1238     }
1239 }