Fine-tuned the page breaking algorithm by adding penalties and
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - implement some simple graphics
16 * * I had an underline below chapter headings in the original
17 * Perl version, and I thought it looked rather nice
18 * * also we need para_Rule.
19 *
20 * - set up contents section now we know what sections begin on
21 * which pages
22 *
23 * - do PDF outline
24 *
25 * - index
26 *
27 * - header/footer? Page numbers at least would be handy. Fully
28 * configurable footer can wait, though.
29 *
30 * That should bring us to the same level of functionality that
31 * original-Halibut had, and the same in PDF plus the obvious
32 * interactive navigation features. After that, in future work:
33 *
34 * - linearised PDF, perhaps?
35 *
36 * - I'm uncertain of whether I need to include a ToUnicode CMap
37 * in each of my font definitions in PDF. Currently things (by
38 * which I mean cut and paste out of acroread) seem to be
39 * working fairly happily without it, but I don't know.
40 *
41 * - configurability
42 *
43 * - title pages
44 */
45
46 #include <assert.h>
47 #include <stdio.h>
48
49 #include "halibut.h"
50 #include "paper.h"
51
52 static font_data *make_std_font(font_list *fontlist, char const *name);
53 static void wrap_paragraph(para_data *pdata, word *words,
54 int w, int i1, int i2);
55 static page_data *page_breaks(line_data *first, line_data *last,
56 int page_height);
57 static void render_line(line_data *ldata, int left_x, int top_y,
58 xref_dest *dest, keywordlist *keywords);
59 static int paper_width_simple(para_data *pdata, word *text);
60 static void code_paragraph(para_data *pdata,
61 font_data *fn, font_data *fi, font_data *fb,
62 int font_size, int indent, word *words);
63
64 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
65 indexdata *idx) {
66 paragraph *p;
67 document *doc;
68 int indent, extra_indent, firstline_indent, aux_indent;
69 para_data *pdata;
70 line_data *ldata, *firstline, *lastline;
71 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
72 page_data *pages;
73 font_list *fontlist;
74 word *aux, *aux2;
75
76 /*
77 * FIXME: All these things ought to become configurable.
78 */
79 int paper_width = 595 * 4096;
80 int paper_height = 841 * 4096;
81 int left_margin = 72 * 4096;
82 int top_margin = 72 * 4096;
83 int right_margin = 72 * 4096;
84 int bottom_margin = 108 * 4096;
85 int indent_list_bullet = 6 * 4096;
86 int indent_list = 24 * 4096;
87 int indent_quote = 18 * 4096;
88 int base_leading = 4096;
89 int base_para_spacing = 10 * 4096;
90 int chapter_top_space = 72 * 4096;
91 int sect_num_left_space = 12 * 4096;
92
93 int base_width = paper_width - left_margin - right_margin;
94 int page_height = paper_height - top_margin - bottom_margin;
95
96 IGNORE(idx); /* FIXME */
97
98 /*
99 * First, set up some font structures.
100 */
101 fontlist = mknew(font_list);
102 fontlist->head = fontlist->tail = NULL;
103 tr = make_std_font(fontlist, "Times-Roman");
104 ti = make_std_font(fontlist, "Times-Italic");
105 hr = make_std_font(fontlist, "Helvetica-Bold");
106 hi = make_std_font(fontlist, "Helvetica-BoldOblique");
107 cr = make_std_font(fontlist, "Courier");
108 co = make_std_font(fontlist, "Courier-Oblique");
109 cb = make_std_font(fontlist, "Courier-Bold");
110
111 /*
112 * Go through and break up each paragraph into lines.
113 */
114 indent = 0;
115 firstline = lastline = NULL;
116 for (p = sourceform; p; p = p->next) {
117 p->private_data = NULL;
118
119 switch (p->type) {
120 /*
121 * These paragraph types are either invisible or don't
122 * define text in the normal sense. Either way, they
123 * don't require wrapping.
124 */
125 case para_IM:
126 case para_BR:
127 case para_Rule:
128 case para_Biblio:
129 case para_NotParaType:
130 case para_Config:
131 case para_VersionID:
132 case para_NoCite:
133 break;
134
135 /*
136 * These paragraph types don't require wrapping, but
137 * they do affect the line width to which we wrap the
138 * rest of the paragraphs, so we need to pay attention.
139 */
140 case para_LcontPush:
141 indent += indent_list; break;
142 case para_LcontPop:
143 indent -= indent_list; assert(indent >= 0); break;
144 case para_QuotePush:
145 indent += indent_quote; break;
146 case para_QuotePop:
147 indent -= indent_quote; assert(indent >= 0); break;
148
149 /*
150 * This paragraph type is special. Process it
151 * specially.
152 */
153 case para_Code:
154 pdata = mknew(para_data);
155 code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
156 p->private_data = pdata;
157 if (pdata->first != pdata->last) {
158 pdata->first->penalty_after += 100000;
159 pdata->last->penalty_before += 100000;
160 }
161 break;
162
163 /*
164 * All of these paragraph types require wrapping in the
165 * ordinary way. So we must supply a set of fonts, a
166 * line width and auxiliary information (e.g. bullet
167 * text) for each one.
168 */
169 case para_Chapter:
170 case para_Appendix:
171 case para_UnnumberedChapter:
172 case para_Heading:
173 case para_Subsect:
174 case para_Normal:
175 case para_BiblioCited:
176 case para_Bullet:
177 case para_NumberedList:
178 case para_DescribedThing:
179 case para_Description:
180 case para_Copyright:
181 case para_Title:
182 pdata = mknew(para_data);
183
184 /*
185 * Choose fonts for this paragraph.
186 *
187 * FIXME: All of this ought to be completely
188 * user-configurable.
189 */
190 switch (p->type) {
191 case para_Title:
192 pdata->fonts[FONT_NORMAL] = hr;
193 pdata->sizes[FONT_NORMAL] = 24;
194 pdata->fonts[FONT_EMPH] = hi;
195 pdata->sizes[FONT_EMPH] = 24;
196 pdata->fonts[FONT_CODE] = cb;
197 pdata->sizes[FONT_CODE] = 24;
198 break;
199
200 case para_Chapter:
201 case para_Appendix:
202 case para_UnnumberedChapter:
203 pdata->fonts[FONT_NORMAL] = hr;
204 pdata->sizes[FONT_NORMAL] = 20;
205 pdata->fonts[FONT_EMPH] = hi;
206 pdata->sizes[FONT_EMPH] = 20;
207 pdata->fonts[FONT_CODE] = cb;
208 pdata->sizes[FONT_CODE] = 20;
209 break;
210
211 case para_Heading:
212 case para_Subsect:
213 pdata->fonts[FONT_NORMAL] = hr;
214 pdata->fonts[FONT_EMPH] = hi;
215 pdata->fonts[FONT_CODE] = cb;
216 pdata->sizes[FONT_NORMAL] =
217 pdata->sizes[FONT_EMPH] =
218 pdata->sizes[FONT_CODE] =
219 (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
220 break;
221
222 case para_Normal:
223 case para_BiblioCited:
224 case para_Bullet:
225 case para_NumberedList:
226 case para_DescribedThing:
227 case para_Description:
228 case para_Copyright:
229 pdata->fonts[FONT_NORMAL] = tr;
230 pdata->sizes[FONT_NORMAL] = 12;
231 pdata->fonts[FONT_EMPH] = ti;
232 pdata->sizes[FONT_EMPH] = 12;
233 pdata->fonts[FONT_CODE] = cr;
234 pdata->sizes[FONT_CODE] = 12;
235 break;
236 }
237
238 /*
239 * Also select an indentation level depending on the
240 * paragraph type (list paragraphs other than
241 * para_DescribedThing need extra indent).
242 *
243 * (FIXME: Perhaps at some point we might even arrange
244 * for the user to be able to request indented first
245 * lines in paragraphs.)
246 */
247 if (p->type == para_Bullet ||
248 p->type == para_NumberedList ||
249 p->type == para_Description) {
250 extra_indent = firstline_indent = indent_list;
251 } else {
252 extra_indent = firstline_indent = 0;
253 }
254
255 /*
256 * Find the auxiliary text for this paragraph.
257 */
258 aux = aux2 = NULL;
259 aux_indent = 0;
260
261 switch (p->type) {
262 case para_Chapter:
263 case para_Appendix:
264 case para_Heading:
265 case para_Subsect:
266 /*
267 * For some heading styles (FIXME: be able to
268 * configure which), the auxiliary text contains
269 * the chapter number and is arranged to be
270 * right-aligned a few points left of the primary
271 * margin. For other styles, the auxiliary text is
272 * the full chapter _name_ and takes up space
273 * within the (wrapped) chapter title, meaning that
274 * we must move the first line indent over to make
275 * space for it.
276 */
277 if (p->type == para_Heading || p->type == para_Subsect) {
278 int len;
279
280 aux = p->kwtext2;
281 len = paper_width_simple(pdata, p->kwtext2);
282 aux_indent = -len - sect_num_left_space;
283 } else {
284 aux = p->kwtext;
285 aux2 = mknew(word);
286 aux2->next = NULL;
287 aux2->alt = NULL;
288 aux2->type = word_Normal;
289 aux2->text = ustrdup(L": ");
290 aux2->breaks = FALSE;
291 aux2->aux = 0;
292 aux_indent = 0;
293
294 firstline_indent += paper_width_simple(pdata, aux);
295 firstline_indent += paper_width_simple(pdata, aux2);
296 }
297 break;
298
299 case para_Bullet:
300 /*
301 * Auxiliary text consisting of a bullet. (FIXME:
302 * configurable bullet.)
303 */
304 aux = mknew(word);
305 aux->next = NULL;
306 aux->alt = NULL;
307 aux->type = word_Normal;
308 aux->text = ustrdup(L"\x2022");
309 aux->breaks = FALSE;
310 aux->aux = 0;
311 aux_indent = indent + indent_list_bullet;
312 break;
313
314 case para_NumberedList:
315 /*
316 * Auxiliary text consisting of the number followed
317 * by a (FIXME: configurable) full stop.
318 */
319 aux = p->kwtext;
320 aux2 = mknew(word);
321 aux2->next = NULL;
322 aux2->alt = NULL;
323 aux2->type = word_Normal;
324 aux2->text = ustrdup(L".");
325 aux2->breaks = FALSE;
326 aux2->aux = 0;
327 aux_indent = indent + indent_list_bullet;
328 break;
329
330 case para_BiblioCited:
331 /*
332 * Auxiliary text consisting of the bibliography
333 * reference text, and a trailing space.
334 */
335 aux = p->kwtext;
336 aux2 = mknew(word);
337 aux2->next = NULL;
338 aux2->alt = NULL;
339 aux2->type = word_Normal;
340 aux2->text = ustrdup(L" ");
341 aux2->breaks = FALSE;
342 aux2->aux = 0;
343 aux_indent = indent;
344 firstline_indent += paper_width_simple(pdata, aux);
345 firstline_indent += paper_width_simple(pdata, aux2);
346 break;
347 }
348
349 wrap_paragraph(pdata, p->words, base_width,
350 indent + firstline_indent,
351 indent + extra_indent);
352
353 p->private_data = pdata;
354
355 pdata->first->aux_text = aux;
356 pdata->first->aux_text_2 = aux2;
357 pdata->first->aux_left_indent = aux_indent;
358
359 /*
360 * Line breaking penalties.
361 */
362 switch (p->type) {
363 case para_Chapter:
364 case para_Appendix:
365 case para_Heading:
366 case para_Subsect:
367 case para_UnnumberedChapter:
368 /*
369 * Fixed and large penalty for breaking straight
370 * after a heading; corresponding bonus for
371 * breaking straight before.
372 */
373 pdata->first->penalty_before = -500000;
374 pdata->last->penalty_after = 500000;
375 for (ldata = pdata->first; ldata; ldata = ldata->next)
376 ldata->penalty_after = 500000;
377 break;
378
379 case para_DescribedThing:
380 /*
381 * This is treated a bit like a small heading:
382 * there's a penalty for breaking after it (i.e.
383 * between it and its description), and a bonus for
384 * breaking before it (actually _between_ list
385 * items).
386 */
387 pdata->first->penalty_before = -200000;
388 pdata->last->penalty_after = 200000;
389 break;
390
391 default:
392 /*
393 * Most paragraph types: widow/orphan control by
394 * discouraging breaking one line from the end of
395 * any paragraph.
396 */
397 if (pdata->first != pdata->last) {
398 pdata->first->penalty_after = 100000;
399 pdata->last->penalty_before = 100000;
400 }
401 break;
402 }
403
404 break;
405 }
406
407 if (p->private_data) {
408 pdata = (para_data *)p->private_data;
409
410 /*
411 * Set the line spacing for each line in this paragraph.
412 */
413 for (ldata = pdata->first; ldata; ldata = ldata->next) {
414 if (ldata == pdata->first)
415 ldata->space_before = base_para_spacing / 2;
416 else
417 ldata->space_before = base_leading / 2;
418 if (ldata == pdata->last)
419 ldata->space_after = base_para_spacing / 2;
420 else
421 ldata->space_after = base_leading / 2;
422 ldata->page_break = FALSE;
423 }
424
425 /*
426 * Some kinds of section heading do require a page
427 * break before them.
428 */
429 if (p->type == para_Title ||
430 p->type == para_Chapter ||
431 p->type == para_Appendix ||
432 p->type == para_UnnumberedChapter) {
433 pdata->first->page_break = TRUE;
434 pdata->first->space_before = chapter_top_space;
435 }
436
437 /*
438 * Link all line structures together into a big list.
439 */
440 if (pdata->first) {
441 if (lastline) {
442 lastline->next = pdata->first;
443 pdata->first->prev = lastline;
444 } else {
445 firstline = pdata->first;
446 pdata->first->prev = NULL;
447 }
448 lastline = pdata->last;
449 }
450 }
451 }
452
453 /*
454 * Now we have an enormous linked list of every line of text in
455 * the document. Break it up into pages.
456 */
457 pages = page_breaks(firstline, lastline, page_height);
458
459 /*
460 * Now we're ready to actually lay out the pages. We do this by
461 * looping over _paragraphs_, since we may need to track cross-
462 * references between lines and even across pages.
463 */
464 for (p = sourceform; p; p = p->next) {
465 pdata = (para_data *)p->private_data;
466
467 if (pdata) {
468 xref_dest dest;
469 dest.type = NONE;
470 for (ldata = pdata->first; ldata; ldata = ldata->next) {
471 render_line(ldata, left_margin, paper_height - top_margin,
472 &dest, keywords);
473 if (ldata == pdata->last)
474 break;
475 }
476 }
477 }
478
479 doc = mknew(document);
480 doc->fonts = fontlist;
481 doc->pages = pages;
482 doc->paper_width = paper_width;
483 doc->paper_height = paper_height;
484 return doc;
485 }
486
487 static font_encoding *new_font_encoding(font_data *font)
488 {
489 font_encoding *fe;
490 int i;
491
492 fe = mknew(font_encoding);
493 fe->next = NULL;
494
495 if (font->list->tail)
496 font->list->tail->next = fe;
497 else
498 font->list->head = fe;
499 font->list->tail = fe;
500
501 fe->font = font;
502 fe->free_pos = 0x21;
503
504 for (i = 0; i < 256; i++) {
505 fe->vector[i] = NULL;
506 fe->indices[i] = -1;
507 fe->to_unicode[i] = 0xFFFF;
508 }
509
510 return fe;
511 }
512
513 static font_data *make_std_font(font_list *fontlist, char const *name)
514 {
515 const int *widths;
516 int nglyphs;
517 font_data *f;
518 font_encoding *fe;
519 int i;
520
521 widths = ps_std_font_widths(name);
522 if (!widths)
523 return NULL;
524
525 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
526
527 f = mknew(font_data);
528
529 f->list = fontlist;
530 f->name = name;
531 f->nglyphs = nglyphs;
532 f->glyphs = ps_std_glyphs;
533 f->widths = widths;
534 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
535
536 /*
537 * Our first subfont will contain all of US-ASCII. This isn't
538 * really necessary - we could just create custom subfonts
539 * precisely as the whim of render_string dictated - but
540 * instinct suggests that it might be nice to have the text in
541 * the output files look _marginally_ recognisable.
542 */
543 fe = new_font_encoding(f);
544 fe->free_pos = 0xA1; /* only the top half is free */
545 f->latest_subfont = fe;
546
547 for (i = 0; i < (int)lenof(f->bmp); i++)
548 f->bmp[i] = 0xFFFF;
549
550 for (i = 0; i < nglyphs; i++) {
551 wchar_t ucs;
552 ucs = ps_glyph_to_unicode(f->glyphs[i]);
553 assert(ucs != 0xFFFF);
554 f->bmp[ucs] = i;
555 if (ucs >= 0x20 && ucs <= 0x7E) {
556 fe->vector[ucs] = f->glyphs[i];
557 fe->indices[ucs] = i;
558 fe->to_unicode[ucs] = ucs;
559 f->subfont_map[i].subfont = fe;
560 f->subfont_map[i].position = ucs;
561 } else {
562 /*
563 * This character is not yet assigned to a subfont.
564 */
565 f->subfont_map[i].subfont = NULL;
566 f->subfont_map[i].position = 0;
567 }
568 }
569
570 return f;
571 }
572
573 static int string_width(font_data *font, wchar_t const *string, int *errs)
574 {
575 int width = 0;
576
577 if (errs)
578 *errs = 0;
579
580 for (; *string; string++) {
581 int index;
582
583 index = font->bmp[(unsigned short)*string];
584 if (index == 0xFFFF) {
585 if (errs)
586 *errs = 1;
587 } else {
588 width += font->widths[index];
589 }
590 }
591
592 return width;
593 }
594
595 static int paper_width_internal(void *vctx, word *word, int *nspaces);
596
597 struct paper_width_ctx {
598 int minspacewidth;
599 para_data *pdata;
600 };
601
602 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
603 int w = 0;
604 while (text && text != end) {
605 w += paper_width_internal(vctx, text, nspaces);
606 text = text->next;
607 }
608 return w;
609 }
610
611 static int paper_width_internal(void *vctx, word *word, int *nspaces)
612 {
613 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
614 int style, type, findex, width, errs;
615 wchar_t *str;
616
617 switch (word->type) {
618 case word_HyperLink:
619 case word_HyperEnd:
620 case word_UpperXref:
621 case word_LowerXref:
622 case word_XrefEnd:
623 case word_IndexRef:
624 return 0;
625 }
626
627 style = towordstyle(word->type);
628 type = removeattr(word->type);
629
630 findex = (style == word_Normal ? FONT_NORMAL :
631 style == word_Emph ? FONT_EMPH :
632 FONT_CODE);
633
634 if (type == word_Normal) {
635 str = word->text;
636 } else if (type == word_WhiteSpace) {
637 if (findex != FONT_CODE) {
638 if (nspaces)
639 (*nspaces)++;
640 return ctx->minspacewidth;
641 } else
642 str = L" ";
643 } else /* if (type == word_Quote) */ {
644 if (word->aux == quote_Open)
645 str = L"\x2018"; /* FIXME: configurability! */
646 else
647 str = L"\x2019"; /* FIXME: configurability! */
648 }
649
650 width = string_width(ctx->pdata->fonts[findex], str, &errs);
651
652 if (errs && word->alt)
653 return paper_width_list(vctx, word->alt, NULL, nspaces);
654 else
655 return ctx->pdata->sizes[findex] * width;
656 }
657
658 static int paper_width(void *vctx, word *word)
659 {
660 return paper_width_internal(vctx, word, NULL);
661 }
662
663 static int paper_width_simple(para_data *pdata, word *text)
664 {
665 struct paper_width_ctx ctx;
666
667 ctx.pdata = pdata;
668 ctx.minspacewidth =
669 (pdata->sizes[FONT_NORMAL] *
670 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
671
672 return paper_width_list(&ctx, text, NULL, NULL);
673 }
674
675 static void wrap_paragraph(para_data *pdata, word *words,
676 int w, int i1, int i2)
677 {
678 wrappedline *wrapping, *p;
679 int spacewidth;
680 struct paper_width_ctx ctx;
681 int line_height;
682
683 /*
684 * We're going to need to store the line height in every line
685 * structure we generate.
686 */
687 {
688 int i;
689 line_height = 0;
690 for (i = 0; i < NFONTS; i++)
691 if (line_height < pdata->sizes[i])
692 line_height = pdata->sizes[i];
693 line_height *= 4096;
694 }
695
696 spacewidth = (pdata->sizes[FONT_NORMAL] *
697 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
698 if (spacewidth == 0) {
699 /*
700 * A font without a space?! Disturbing. I hope this never
701 * comes up, but I'll make a random guess anyway and set my
702 * space width to half the point size.
703 */
704 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
705 }
706
707 /*
708 * I'm going to set the _minimum_ space width to 3/5 of the
709 * standard one, and use the standard one as the optimum.
710 */
711 ctx.minspacewidth = spacewidth * 3 / 5;
712 ctx.pdata = pdata;
713
714 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
715
716 /*
717 * Having done the wrapping, we now concoct a set of line_data
718 * structures.
719 */
720 pdata->first = pdata->last = NULL;
721
722 for (p = wrapping; p; p = p->next) {
723 line_data *ldata;
724 word *wd;
725 int len, wid, spaces;
726
727 ldata = mknew(line_data);
728
729 ldata->pdata = pdata;
730 ldata->first = p->begin;
731 ldata->end = p->end;
732 ldata->line_height = line_height;
733
734 ldata->xpos = (p == wrapping ? i1 : i2);
735
736 if (pdata->last) {
737 pdata->last->next = ldata;
738 ldata->prev = pdata->last;
739 } else {
740 pdata->first = ldata;
741 ldata->prev = NULL;
742 }
743 ldata->next = NULL;
744 pdata->last = ldata;
745
746 spaces = 0;
747 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
748 wid = (p == wrapping ? w - i1 : w - i2);
749 wd = ldata->first;
750
751 ldata->hshortfall = wid - len;
752 ldata->nspaces = spaces;
753 /*
754 * This tells us how much the space width needs to
755 * change from _min_spacewidth. But we want to store
756 * its difference from the _natural_ space width, to
757 * make the text rendering easier.
758 */
759 ldata->hshortfall += ctx.minspacewidth * spaces;
760 ldata->hshortfall -= spacewidth * spaces;
761 /*
762 * Special case: on the last line of a paragraph, we
763 * never stretch spaces.
764 */
765 if (ldata->hshortfall > 0 && !p->next)
766 ldata->hshortfall = 0;
767
768 ldata->aux_text = NULL;
769 ldata->aux_text_2 = NULL;
770 ldata->aux_left_indent = 0;
771 ldata->penalty_before = ldata->penalty_after = 0;
772 }
773
774 }
775
776 static page_data *page_breaks(line_data *first, line_data *last,
777 int page_height)
778 {
779 line_data *l, *m;
780 page_data *ph, *pt;
781
782 /*
783 * Page breaking is done by a close analogue of the optimal
784 * paragraph wrapping algorithm used by wrap_para(). We work
785 * backwards from the end of the document line by line; for
786 * each line, we contemplate every possible number of lines we
787 * could put on a page starting with that line, determine a
788 * cost function for each one, add it to the pre-computed cost
789 * function for optimally page-breaking everything after that
790 * page, and pick the best option.
791 *
792 * Since my line_data structures are only used for this
793 * purpose, I might as well just store the algorithm data
794 * directly in them.
795 */
796
797 for (l = last; l; l = l->prev) {
798 int minheight, text = 0, space = 0;
799 int cost;
800
801 l->bestcost = -1;
802 for (m = l; m; m = m->next) {
803 if (m != l && m->page_break)
804 break; /* we've gone as far as we can */
805
806 if (m != l)
807 space += m->prev->space_after;
808 if (m != l || m->page_break)
809 space += m->space_before;
810 text += m->line_height;
811 minheight = text + space;
812
813 if (m != l && minheight > page_height)
814 break;
815
816 /*
817 * Compute the cost of this arrangement, as the square
818 * of the amount of wasted space on the page.
819 * Exception: if this is the last page before a
820 * mandatory break or the document end, we don't
821 * penalise a large blank area.
822 */
823 if (m->next && !m->next->page_break)
824 {
825 int x = page_height - minheight;
826 int xf;
827
828 xf = x & 0xFF;
829 x >>= 8;
830
831 cost = x*x;
832 cost += (x * xf) >> 8;
833 } else
834 cost = 0;
835
836 if (m->next && !m->next->page_break) {
837 cost += m->penalty_after;
838 cost += m->next->penalty_before;
839 }
840
841 if (m->next && !m->next->page_break)
842 cost += m->next->bestcost;
843 if (l->bestcost == -1 || l->bestcost > cost) {
844 /*
845 * This is the best option yet for this starting
846 * point.
847 */
848 l->bestcost = cost;
849 if (m->next && !m->next->page_break)
850 l->vshortfall = page_height - minheight;
851 else
852 l->vshortfall = 0;
853 l->text = text;
854 l->space = space;
855 l->page_last = m;
856 }
857 }
858 }
859
860 /*
861 * Now go through the line list forwards and assemble the
862 * actual pages.
863 */
864 ph = pt = NULL;
865
866 l = first;
867 while (l) {
868 page_data *page;
869 int text, space;
870
871 page = mknew(page_data);
872 page->next = NULL;
873 page->prev = pt;
874 if (pt)
875 pt->next = page;
876 else
877 ph = page;
878 pt = page;
879
880 page->first_line = l;
881 page->last_line = l->page_last;
882
883 page->first_text = page->last_text = NULL;
884
885 page->first_xref = page->last_xref = NULL;
886
887 /*
888 * Now assign a y-coordinate to each line on the page.
889 */
890 text = space = 0;
891 for (l = page->first_line; l; l = l->next) {
892 if (l != page->first_line)
893 space += l->prev->space_after;
894 if (l != page->first_line || l->page_break)
895 space += l->space_before;
896 text += l->line_height;
897
898 l->page = page;
899 l->ypos = text + space +
900 space * (float)page->first_line->vshortfall /
901 page->first_line->space;
902
903 if (l == page->last_line)
904 break;
905 }
906
907 l = page->last_line->next;
908 }
909
910 return ph;
911 }
912
913 static void add_string_to_page(page_data *page, int x, int y,
914 font_encoding *fe, int size, char *text)
915 {
916 text_fragment *frag;
917
918 frag = mknew(text_fragment);
919 frag->next = NULL;
920
921 if (page->last_text)
922 page->last_text->next = frag;
923 else
924 page->first_text = frag;
925 page->last_text = frag;
926
927 frag->x = x;
928 frag->y = y;
929 frag->fe = fe;
930 frag->fontsize = size;
931 frag->text = dupstr(text);
932 }
933
934 /*
935 * Returns the updated x coordinate.
936 */
937 static int render_string(page_data *page, font_data *font, int fontsize,
938 int x, int y, wchar_t *str)
939 {
940 char *text;
941 int textpos, textwid, glyph;
942 font_encoding *subfont = NULL, *sf;
943
944 text = mknewa(char, 1 + ustrlen(str));
945 textpos = textwid = 0;
946
947 while (*str) {
948 glyph = font->bmp[*str];
949
950 if (glyph == 0xFFFF)
951 continue; /* nothing more we can do here */
952
953 /*
954 * Find which subfont this character is going in.
955 */
956 sf = font->subfont_map[glyph].subfont;
957
958 if (!sf) {
959 int c;
960
961 /*
962 * This character is not yet in a subfont. Assign one.
963 */
964 if (font->latest_subfont->free_pos >= 0x100)
965 font->latest_subfont = new_font_encoding(font);
966
967 c = font->latest_subfont->free_pos++;
968 if (font->latest_subfont->free_pos == 0x7F)
969 font->latest_subfont->free_pos = 0xA1;
970
971 font->subfont_map[glyph].subfont = font->latest_subfont;
972 font->subfont_map[glyph].position = c;
973 font->latest_subfont->vector[c] = font->glyphs[glyph];
974 font->latest_subfont->indices[c] = glyph;
975 font->latest_subfont->to_unicode[c] = *str;
976
977 sf = font->latest_subfont;
978 }
979
980 if (!subfont || sf != subfont) {
981 if (subfont) {
982 text[textpos] = '\0';
983 add_string_to_page(page, x, y, subfont, fontsize, text);
984 x += textwid;
985 } else {
986 assert(textpos == 0);
987 }
988 textpos = 0;
989 subfont = sf;
990 }
991
992 text[textpos++] = font->subfont_map[glyph].position;
993 textwid += font->widths[glyph] * fontsize;
994
995 str++;
996 }
997
998 if (textpos > 0) {
999 text[textpos] = '\0';
1000 add_string_to_page(page, x, y, subfont, fontsize, text);
1001 x += textwid;
1002 }
1003
1004 return x;
1005 }
1006
1007 /*
1008 * Returns the updated x coordinate.
1009 */
1010 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
1011 int x, int y, word *text, word *text_end, xref **xr,
1012 int shortfall, int nspaces, int *nspace,
1013 keywordlist *keywords)
1014 {
1015 while (text && text != text_end) {
1016 int style, type, findex, errs;
1017 wchar_t *str;
1018 xref_dest dest;
1019
1020 switch (text->type) {
1021 /*
1022 * Start a cross-reference.
1023 */
1024 case word_HyperLink:
1025 case word_UpperXref:
1026 case word_LowerXref:
1027
1028 if (text->type == word_HyperLink) {
1029 dest.type = URL;
1030 dest.url = utoa_dup(text->text);
1031 dest.page = NULL;
1032 } else {
1033 keyword *kwl = kw_lookup(keywords, text->text);
1034 para_data *pdata;
1035
1036 if (kwl) {
1037 assert(kwl->para->private_data);
1038 pdata = (para_data *) kwl->para->private_data;
1039 dest.type = PAGE;
1040 dest.page = pdata->first->page;
1041 dest.url = NULL;
1042 } else {
1043 /*
1044 * Shouldn't happen, but *shrug*
1045 */
1046 dest.type = NONE;
1047 dest.page = NULL;
1048 dest.url = NULL;
1049 }
1050 }
1051 if (dest.type != NONE) {
1052 *xr = mknew(xref);
1053 (*xr)->dest = dest; /* structure copy */
1054 if (page->last_xref)
1055 page->last_xref->next = *xr;
1056 else
1057 page->first_xref = *xr;
1058 page->last_xref = *xr;
1059
1060 /*
1061 * FIXME: Ideally we should have, and use, some
1062 * vertical font metric information here so that
1063 * our cross-ref rectangle can take account of
1064 * descenders and the font's cap height. This will
1065 * do for the moment, but it isn't ideal.
1066 */
1067 (*xr)->lx = (*xr)->rx = x;
1068 (*xr)->by = y;
1069 (*xr)->ty = y + ldata->line_height;
1070 }
1071 goto nextword;
1072
1073 /*
1074 * Finish extending a cross-reference box.
1075 */
1076 case word_HyperEnd:
1077 case word_XrefEnd:
1078 *xr = NULL;
1079 goto nextword;
1080
1081 case word_IndexRef:
1082 goto nextword;
1083 /*
1084 * FIXME: we should do something with this.
1085 */
1086 }
1087
1088 style = towordstyle(text->type);
1089 type = removeattr(text->type);
1090
1091 findex = (style == word_Normal ? FONT_NORMAL :
1092 style == word_Emph ? FONT_EMPH :
1093 FONT_CODE);
1094
1095 if (type == word_Normal) {
1096 str = text->text;
1097 } else if (type == word_WhiteSpace) {
1098 x += pdata->sizes[findex] *
1099 string_width(pdata->fonts[findex], L" ", NULL);
1100 if (nspaces && findex != FONT_CODE) {
1101 x += (*nspace+1) * shortfall / nspaces;
1102 x -= *nspace * shortfall / nspaces;
1103 (*nspace)++;
1104 }
1105 goto nextword;
1106 } else /* if (type == word_Quote) */ {
1107 if (text->aux == quote_Open)
1108 str = L"\x2018"; /* FIXME: configurability! */
1109 else
1110 str = L"\x2019"; /* FIXME: configurability! */
1111 }
1112
1113 (void) string_width(pdata->fonts[findex], str, &errs);
1114
1115 if (errs && text->alt)
1116 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1117 xr, shortfall, nspaces, nspace, keywords);
1118 else
1119 x = render_string(page, pdata->fonts[findex],
1120 pdata->sizes[findex], x, y, str);
1121
1122 if (*xr)
1123 (*xr)->rx = x;
1124
1125 nextword:
1126 text = text->next;
1127 }
1128
1129 return x;
1130 }
1131
1132 static void render_line(line_data *ldata, int left_x, int top_y,
1133 xref_dest *dest, keywordlist *keywords)
1134 {
1135 int nspace;
1136 xref *xr;
1137
1138 if (ldata->aux_text) {
1139 int x;
1140 xr = NULL;
1141 nspace = 0;
1142 x = render_text(ldata->page, ldata->pdata, ldata,
1143 left_x + ldata->aux_left_indent,
1144 top_y - ldata->ypos,
1145 ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1146 if (ldata->aux_text_2)
1147 render_text(ldata->page, ldata->pdata, ldata,
1148 x, top_y - ldata->ypos,
1149 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1150 }
1151 nspace = 0;
1152
1153 /*
1154 * There might be a cross-reference carried over from a
1155 * previous line.
1156 */
1157 if (dest->type != NONE) {
1158 xr = mknew(xref);
1159 xr->dest = *dest; /* structure copy */
1160 if (ldata->page->last_xref)
1161 ldata->page->last_xref->next = xr;
1162 else
1163 ldata->page->first_xref = xr;
1164 ldata->page->last_xref = xr;
1165 xr->lx = xr->rx = left_x + ldata->xpos;
1166 xr->by = top_y - ldata->ypos;
1167 xr->ty = top_y - ldata->ypos + ldata->line_height;
1168 } else
1169 xr = NULL;
1170
1171 render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1172 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1173 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1174
1175 if (xr) {
1176 /*
1177 * There's a cross-reference continued on to the next line.
1178 */
1179 *dest = xr->dest;
1180 } else
1181 dest->type = NONE;
1182 }
1183
1184 static void code_paragraph(para_data *pdata,
1185 font_data *fn, font_data *fi, font_data *fb,
1186 int font_size, int indent, word *words)
1187 {
1188 /*
1189 * For code paragraphs, I'm going to hack grievously and
1190 * pretend the three normal fonts are the three code paragraph
1191 * fonts.
1192 */
1193 pdata->fonts[FONT_NORMAL] = fb;
1194 pdata->fonts[FONT_EMPH] = fi;
1195 pdata->fonts[FONT_CODE] = fn;
1196 pdata->sizes[FONT_NORMAL] =
1197 pdata->sizes[FONT_EMPH] =
1198 pdata->sizes[FONT_CODE] = font_size;
1199
1200 pdata->first = pdata->last = NULL;
1201
1202 for (; words; words = words->next) {
1203 wchar_t *t, *e, *start;
1204 word *lhead = NULL, *ltail = NULL, *w;
1205 line_data *ldata;
1206 int prev = -1, curr;
1207
1208 t = words->text;
1209 if (words->next && words->next->type == word_Emph) {
1210 e = words->next->text;
1211 words = words->next;
1212 } else
1213 e = NULL;
1214
1215 start = t;
1216
1217 while (*start) {
1218 while (*t) {
1219 if (!e || !*e)
1220 curr = 0;
1221 else if (*e == L'i')
1222 curr = 1;
1223 else if (*e == L'b')
1224 curr = 2;
1225 else
1226 curr = 0;
1227
1228 if (prev < 0)
1229 prev = curr;
1230
1231 if (curr != prev)
1232 break;
1233
1234 t++;
1235 if (e && *e)
1236 e++;
1237 }
1238
1239 /*
1240 * We've isolated a maximal subsequence of the line
1241 * which has the same emphasis. Form it into a word
1242 * structure.
1243 */
1244 w = mknew(word);
1245 w->next = NULL;
1246 w->alt = NULL;
1247 w->type = (prev == 0 ? word_WeakCode :
1248 prev == 1 ? word_Emph : word_Normal);
1249 w->text = mknewa(wchar_t, t-start+1);
1250 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1251 w->text[t-start] = '\0';
1252 w->breaks = FALSE;
1253
1254 if (ltail)
1255 ltail->next = w;
1256 else
1257 lhead = w;
1258 ltail = w;
1259
1260 start = t;
1261 prev = -1;
1262 }
1263
1264 ldata = mknew(line_data);
1265
1266 ldata->pdata = pdata;
1267 ldata->first = lhead;
1268 ldata->end = NULL;
1269 ldata->line_height = font_size * 4096;
1270
1271 ldata->xpos = indent;
1272
1273 if (pdata->last) {
1274 pdata->last->next = ldata;
1275 ldata->prev = pdata->last;
1276 } else {
1277 pdata->first = ldata;
1278 ldata->prev = NULL;
1279 }
1280 ldata->next = NULL;
1281 pdata->last = ldata;
1282
1283 ldata->hshortfall = 0;
1284 ldata->nspaces = 0;
1285 ldata->aux_text = NULL;
1286 ldata->aux_text_2 = NULL;
1287 ldata->aux_left_indent = 0;
1288 /* General opprobrium for breaking in a code paragraph. */
1289 ldata->penalty_before = ldata->penalty_after = 50000;
1290 }
1291 }