Implemented PDF outlines.
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - set up contents section now we know what sections begin on
16 * which pages
17 *
18 * - index
19 *
20 * - header/footer? Page numbers at least would be handy. Fully
21 * configurable footer can wait, though.
22 *
23 * That should bring us to the same level of functionality that
24 * original-Halibut had, and the same in PDF plus the obvious
25 * interactive navigation features. After that, in future work:
26 *
27 * - linearised PDF, perhaps?
28 *
29 * - I'm uncertain of whether I need to include a ToUnicode CMap
30 * in each of my font definitions in PDF. Currently things (by
31 * which I mean cut and paste out of acroread) seem to be
32 * working fairly happily without it, but I don't know.
33 *
34 * - configurability
35 *
36 * - title pages
37 */
38
39 #include <assert.h>
40 #include <stdio.h>
41
42 #include "halibut.h"
43 #include "paper.h"
44
45 static font_data *make_std_font(font_list *fontlist, char const *name);
46 static void wrap_paragraph(para_data *pdata, word *words,
47 int w, int i1, int i2);
48 static page_data *page_breaks(line_data *first, line_data *last,
49 int page_height);
50 static void render_line(line_data *ldata, int left_x, int top_y,
51 xref_dest *dest, keywordlist *keywords);
52 static int paper_width_simple(para_data *pdata, word *text);
53 static void code_paragraph(para_data *pdata,
54 font_data *fn, font_data *fi, font_data *fb,
55 int font_size, int indent, word *words);
56 static void rule_paragraph(para_data *pdata, int indent, int height);
57 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
58
59 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
60 indexdata *idx) {
61 paragraph *p;
62 document *doc;
63 int indent, extra_indent, firstline_indent, aux_indent;
64 para_data *pdata;
65 line_data *ldata, *firstline, *lastline;
66 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
67 page_data *pages;
68 font_list *fontlist;
69 word *aux, *aux2;
70
71 /*
72 * FIXME: All these things ought to become configurable.
73 */
74 int paper_width = 595 * 4096;
75 int paper_height = 841 * 4096;
76 int left_margin = 72 * 4096;
77 int top_margin = 72 * 4096;
78 int right_margin = 72 * 4096;
79 int bottom_margin = 108 * 4096;
80 int indent_list_bullet = 6 * 4096;
81 int indent_list = 24 * 4096;
82 int indent_quote = 18 * 4096;
83 int base_leading = 4096;
84 int base_para_spacing = 10 * 4096;
85 int chapter_top_space = 72 * 4096;
86 int sect_num_left_space = 12 * 4096;
87 int chapter_underline_depth = 14 * 4096;
88 int chapter_underline_thickness = 3 * 4096;
89 int rule_thickness = 1 * 4096;
90
91 int base_width = paper_width - left_margin - right_margin;
92 int page_height = paper_height - top_margin - bottom_margin;
93
94 IGNORE(idx); /* FIXME */
95
96 /*
97 * First, set up some font structures.
98 */
99 fontlist = mknew(font_list);
100 fontlist->head = fontlist->tail = NULL;
101 tr = make_std_font(fontlist, "Times-Roman");
102 ti = make_std_font(fontlist, "Times-Italic");
103 hr = make_std_font(fontlist, "Helvetica-Bold");
104 hi = make_std_font(fontlist, "Helvetica-BoldOblique");
105 cr = make_std_font(fontlist, "Courier");
106 co = make_std_font(fontlist, "Courier-Oblique");
107 cb = make_std_font(fontlist, "Courier-Bold");
108
109 /*
110 * Go through and break up each paragraph into lines.
111 */
112 indent = 0;
113 firstline = lastline = NULL;
114 for (p = sourceform; p; p = p->next) {
115 p->private_data = NULL;
116
117 switch (p->type) {
118 /*
119 * These paragraph types are either invisible or don't
120 * define text in the normal sense. Either way, they
121 * don't require wrapping.
122 */
123 case para_IM:
124 case para_BR:
125 case para_Biblio:
126 case para_NotParaType:
127 case para_Config:
128 case para_VersionID:
129 case para_NoCite:
130 break;
131
132 /*
133 * These paragraph types don't require wrapping, but
134 * they do affect the line width to which we wrap the
135 * rest of the paragraphs, so we need to pay attention.
136 */
137 case para_LcontPush:
138 indent += indent_list; break;
139 case para_LcontPop:
140 indent -= indent_list; assert(indent >= 0); break;
141 case para_QuotePush:
142 indent += indent_quote; break;
143 case para_QuotePop:
144 indent -= indent_quote; assert(indent >= 0); break;
145
146 /*
147 * This paragraph type is special. Process it
148 * specially.
149 */
150 case para_Code:
151 pdata = mknew(para_data);
152 code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
153 p->private_data = pdata;
154 if (pdata->first != pdata->last) {
155 pdata->first->penalty_after += 100000;
156 pdata->last->penalty_before += 100000;
157 }
158 break;
159
160 /*
161 * This paragraph is also special.
162 */
163 case para_Rule:
164 pdata = mknew(para_data);
165 rule_paragraph(pdata, indent, rule_thickness);
166 p->private_data = pdata;
167 break;
168
169 /*
170 * All of these paragraph types require wrapping in the
171 * ordinary way. So we must supply a set of fonts, a
172 * line width and auxiliary information (e.g. bullet
173 * text) for each one.
174 */
175 case para_Chapter:
176 case para_Appendix:
177 case para_UnnumberedChapter:
178 case para_Heading:
179 case para_Subsect:
180 case para_Normal:
181 case para_BiblioCited:
182 case para_Bullet:
183 case para_NumberedList:
184 case para_DescribedThing:
185 case para_Description:
186 case para_Copyright:
187 case para_Title:
188 pdata = mknew(para_data);
189
190 /*
191 * Choose fonts for this paragraph.
192 *
193 * FIXME: All of this ought to be completely
194 * user-configurable.
195 */
196 switch (p->type) {
197 case para_Title:
198 pdata->fonts[FONT_NORMAL] = hr;
199 pdata->sizes[FONT_NORMAL] = 24;
200 pdata->fonts[FONT_EMPH] = hi;
201 pdata->sizes[FONT_EMPH] = 24;
202 pdata->fonts[FONT_CODE] = cb;
203 pdata->sizes[FONT_CODE] = 24;
204 break;
205
206 case para_Chapter:
207 case para_Appendix:
208 case para_UnnumberedChapter:
209 pdata->fonts[FONT_NORMAL] = hr;
210 pdata->sizes[FONT_NORMAL] = 20;
211 pdata->fonts[FONT_EMPH] = hi;
212 pdata->sizes[FONT_EMPH] = 20;
213 pdata->fonts[FONT_CODE] = cb;
214 pdata->sizes[FONT_CODE] = 20;
215 break;
216
217 case para_Heading:
218 case para_Subsect:
219 pdata->fonts[FONT_NORMAL] = hr;
220 pdata->fonts[FONT_EMPH] = hi;
221 pdata->fonts[FONT_CODE] = cb;
222 pdata->sizes[FONT_NORMAL] =
223 pdata->sizes[FONT_EMPH] =
224 pdata->sizes[FONT_CODE] =
225 (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
226 break;
227
228 case para_Normal:
229 case para_BiblioCited:
230 case para_Bullet:
231 case para_NumberedList:
232 case para_DescribedThing:
233 case para_Description:
234 case para_Copyright:
235 pdata->fonts[FONT_NORMAL] = tr;
236 pdata->sizes[FONT_NORMAL] = 12;
237 pdata->fonts[FONT_EMPH] = ti;
238 pdata->sizes[FONT_EMPH] = 12;
239 pdata->fonts[FONT_CODE] = cr;
240 pdata->sizes[FONT_CODE] = 12;
241 break;
242 }
243
244 /*
245 * Also select an indentation level depending on the
246 * paragraph type (list paragraphs other than
247 * para_DescribedThing need extra indent).
248 *
249 * (FIXME: Perhaps at some point we might even arrange
250 * for the user to be able to request indented first
251 * lines in paragraphs.)
252 */
253 if (p->type == para_Bullet ||
254 p->type == para_NumberedList ||
255 p->type == para_Description) {
256 extra_indent = firstline_indent = indent_list;
257 } else {
258 extra_indent = firstline_indent = 0;
259 }
260
261 /*
262 * Find the auxiliary text for this paragraph.
263 */
264 aux = aux2 = NULL;
265 aux_indent = 0;
266
267 switch (p->type) {
268 case para_Chapter:
269 case para_Appendix:
270 case para_Heading:
271 case para_Subsect:
272 /*
273 * For some heading styles (FIXME: be able to
274 * configure which), the auxiliary text contains
275 * the chapter number and is arranged to be
276 * right-aligned a few points left of the primary
277 * margin. For other styles, the auxiliary text is
278 * the full chapter _name_ and takes up space
279 * within the (wrapped) chapter title, meaning that
280 * we must move the first line indent over to make
281 * space for it.
282 */
283 if (p->type == para_Heading || p->type == para_Subsect) {
284 int len;
285
286 aux = p->kwtext2;
287 len = paper_width_simple(pdata, p->kwtext2);
288 aux_indent = -len - sect_num_left_space;
289 } else {
290 aux = p->kwtext;
291 aux2 = mknew(word);
292 aux2->next = NULL;
293 aux2->alt = NULL;
294 aux2->type = word_Normal;
295 aux2->text = ustrdup(L": ");
296 aux2->breaks = FALSE;
297 aux2->aux = 0;
298 aux_indent = 0;
299
300 firstline_indent += paper_width_simple(pdata, aux);
301 firstline_indent += paper_width_simple(pdata, aux2);
302 }
303 break;
304
305 case para_Bullet:
306 /*
307 * Auxiliary text consisting of a bullet. (FIXME:
308 * configurable bullet.)
309 */
310 aux = mknew(word);
311 aux->next = NULL;
312 aux->alt = NULL;
313 aux->type = word_Normal;
314 aux->text = ustrdup(L"\x2022");
315 aux->breaks = FALSE;
316 aux->aux = 0;
317 aux_indent = indent + indent_list_bullet;
318 break;
319
320 case para_NumberedList:
321 /*
322 * Auxiliary text consisting of the number followed
323 * by a (FIXME: configurable) full stop.
324 */
325 aux = p->kwtext;
326 aux2 = mknew(word);
327 aux2->next = NULL;
328 aux2->alt = NULL;
329 aux2->type = word_Normal;
330 aux2->text = ustrdup(L".");
331 aux2->breaks = FALSE;
332 aux2->aux = 0;
333 aux_indent = indent + indent_list_bullet;
334 break;
335
336 case para_BiblioCited:
337 /*
338 * Auxiliary text consisting of the bibliography
339 * reference text, and a trailing space.
340 */
341 aux = p->kwtext;
342 aux2 = mknew(word);
343 aux2->next = NULL;
344 aux2->alt = NULL;
345 aux2->type = word_Normal;
346 aux2->text = ustrdup(L" ");
347 aux2->breaks = FALSE;
348 aux2->aux = 0;
349 aux_indent = indent;
350 firstline_indent += paper_width_simple(pdata, aux);
351 firstline_indent += paper_width_simple(pdata, aux2);
352 break;
353 }
354
355 wrap_paragraph(pdata, p->words, base_width,
356 indent + firstline_indent,
357 indent + extra_indent);
358
359 p->private_data = pdata;
360
361 pdata->first->aux_text = aux;
362 pdata->first->aux_text_2 = aux2;
363 pdata->first->aux_left_indent = aux_indent;
364
365 /*
366 * Line breaking penalties.
367 */
368 switch (p->type) {
369 case para_Chapter:
370 case para_Appendix:
371 case para_Heading:
372 case para_Subsect:
373 case para_UnnumberedChapter:
374 /*
375 * Fixed and large penalty for breaking straight
376 * after a heading; corresponding bonus for
377 * breaking straight before.
378 */
379 pdata->first->penalty_before = -500000;
380 pdata->last->penalty_after = 500000;
381 for (ldata = pdata->first; ldata; ldata = ldata->next)
382 ldata->penalty_after = 500000;
383 break;
384
385 case para_DescribedThing:
386 /*
387 * This is treated a bit like a small heading:
388 * there's a penalty for breaking after it (i.e.
389 * between it and its description), and a bonus for
390 * breaking before it (actually _between_ list
391 * items).
392 */
393 pdata->first->penalty_before = -200000;
394 pdata->last->penalty_after = 200000;
395 break;
396
397 default:
398 /*
399 * Most paragraph types: widow/orphan control by
400 * discouraging breaking one line from the end of
401 * any paragraph.
402 */
403 if (pdata->first != pdata->last) {
404 pdata->first->penalty_after = 100000;
405 pdata->last->penalty_before = 100000;
406 }
407 break;
408 }
409
410 break;
411 }
412
413 if (p->private_data) {
414 pdata = (para_data *)p->private_data;
415
416 /*
417 * Set the line spacing for each line in this paragraph.
418 */
419 for (ldata = pdata->first; ldata; ldata = ldata->next) {
420 if (ldata == pdata->first)
421 ldata->space_before = base_para_spacing / 2;
422 else
423 ldata->space_before = base_leading / 2;
424 if (ldata == pdata->last)
425 ldata->space_after = base_para_spacing / 2;
426 else
427 ldata->space_after = base_leading / 2;
428 ldata->page_break = FALSE;
429 }
430
431 /*
432 * Some kinds of section heading do require a page
433 * break before them.
434 */
435 if (p->type == para_Title ||
436 p->type == para_Chapter ||
437 p->type == para_Appendix ||
438 p->type == para_UnnumberedChapter) {
439 pdata->first->page_break = TRUE;
440 pdata->first->space_before = chapter_top_space;
441 pdata->last->space_after +=
442 chapter_underline_depth + chapter_underline_thickness;
443 }
444
445 /*
446 * Link all line structures together into a big list.
447 */
448 if (pdata->first) {
449 if (lastline) {
450 lastline->next = pdata->first;
451 pdata->first->prev = lastline;
452 } else {
453 firstline = pdata->first;
454 pdata->first->prev = NULL;
455 }
456 lastline = pdata->last;
457 }
458 }
459 }
460
461 /*
462 * Now we have an enormous linked list of every line of text in
463 * the document. Break it up into pages.
464 */
465 pages = page_breaks(firstline, lastline, page_height);
466
467 /*
468 * Now we're ready to actually lay out the pages. We do this by
469 * looping over _paragraphs_, since we may need to track cross-
470 * references between lines and even across pages.
471 */
472 for (p = sourceform; p; p = p->next) {
473 pdata = (para_data *)p->private_data;
474
475 if (pdata) {
476 xref_dest dest;
477 dest.type = NONE;
478 for (ldata = pdata->first; ldata; ldata = ldata->next) {
479 render_line(ldata, left_margin, paper_height - top_margin,
480 &dest, keywords);
481 if (ldata == pdata->last)
482 break;
483 }
484
485 /*
486 * Some section headings (FIXME: should be configurable
487 * which) want to be underlined.
488 */
489 if (p->type == para_Chapter || p->type == para_Appendix ||
490 p->type == para_UnnumberedChapter || p->type == para_Title) {
491 add_rect_to_page(pdata->last->page,
492 left_margin,
493 (paper_height - top_margin -
494 pdata->last->ypos - chapter_underline_depth),
495 base_width,
496 chapter_underline_thickness);
497 }
498
499 /*
500 * Rule paragraphs need to contain an actual rule!
501 */
502 if (p->type == para_Rule) {
503 add_rect_to_page(pdata->first->page,
504 left_margin + pdata->first->xpos,
505 (paper_height - top_margin -
506 pdata->last->ypos -
507 pdata->last->line_height),
508 base_width - pdata->first->xpos,
509 pdata->last->line_height);
510 }
511 }
512 }
513
514 /*
515 * Start putting together the overall document structure we're
516 * going to return.
517 */
518 doc = mknew(document);
519 doc->fonts = fontlist;
520 doc->pages = pages;
521 doc->paper_width = paper_width;
522 doc->paper_height = paper_height;
523
524 /*
525 * Collect the section heading paragraphs into a document
526 * outline. This is slightly fiddly because the Title paragraph
527 * isn't required to be at the start, although all the others
528 * must be in order.
529 */
530 {
531 int osize = 20;
532
533 doc->outline_elements = mknewa(outline_element, osize);
534 doc->n_outline_elements = 0;
535
536 /* First find the title. */
537 for (p = sourceform; p; p = p->next) {
538 switch (p->type) {
539 case para_Title:
540 doc->outline_elements[0].level = 0;
541 doc->outline_elements[0].para = p;
542 doc->n_outline_elements++;
543 break;
544 }
545 }
546
547 /* Then collect the rest. */
548 for (p = sourceform; p; p = p->next) {
549 switch (p->type) {
550 case para_Chapter:
551 case para_UnnumberedChapter:
552 case para_Appendix:
553 case para_Heading:
554 case para_Subsect:
555
556 if (doc->n_outline_elements >= osize) {
557 osize += 20;
558 doc->outline_elements =
559 resize(doc->outline_elements, osize);
560 }
561
562 if (p->type == para_Heading) {
563 doc->outline_elements[doc->n_outline_elements].level = 2;
564 } else if (p->type == para_Subsect) {
565 doc->outline_elements[doc->n_outline_elements].level =
566 3 + p->aux;
567 } else
568 doc->outline_elements[doc->n_outline_elements].level = 1;
569
570 doc->outline_elements[doc->n_outline_elements].para = p;
571 doc->n_outline_elements++;
572 break;
573 }
574 }
575 }
576
577 return doc;
578 }
579
580 static font_encoding *new_font_encoding(font_data *font)
581 {
582 font_encoding *fe;
583 int i;
584
585 fe = mknew(font_encoding);
586 fe->next = NULL;
587
588 if (font->list->tail)
589 font->list->tail->next = fe;
590 else
591 font->list->head = fe;
592 font->list->tail = fe;
593
594 fe->font = font;
595 fe->free_pos = 0x21;
596
597 for (i = 0; i < 256; i++) {
598 fe->vector[i] = NULL;
599 fe->indices[i] = -1;
600 fe->to_unicode[i] = 0xFFFF;
601 }
602
603 return fe;
604 }
605
606 static font_data *make_std_font(font_list *fontlist, char const *name)
607 {
608 const int *widths;
609 int nglyphs;
610 font_data *f;
611 font_encoding *fe;
612 int i;
613
614 widths = ps_std_font_widths(name);
615 if (!widths)
616 return NULL;
617
618 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
619
620 f = mknew(font_data);
621
622 f->list = fontlist;
623 f->name = name;
624 f->nglyphs = nglyphs;
625 f->glyphs = ps_std_glyphs;
626 f->widths = widths;
627 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
628
629 /*
630 * Our first subfont will contain all of US-ASCII. This isn't
631 * really necessary - we could just create custom subfonts
632 * precisely as the whim of render_string dictated - but
633 * instinct suggests that it might be nice to have the text in
634 * the output files look _marginally_ recognisable.
635 */
636 fe = new_font_encoding(f);
637 fe->free_pos = 0xA1; /* only the top half is free */
638 f->latest_subfont = fe;
639
640 for (i = 0; i < (int)lenof(f->bmp); i++)
641 f->bmp[i] = 0xFFFF;
642
643 for (i = 0; i < nglyphs; i++) {
644 wchar_t ucs;
645 ucs = ps_glyph_to_unicode(f->glyphs[i]);
646 assert(ucs != 0xFFFF);
647 f->bmp[ucs] = i;
648 if (ucs >= 0x20 && ucs <= 0x7E) {
649 fe->vector[ucs] = f->glyphs[i];
650 fe->indices[ucs] = i;
651 fe->to_unicode[ucs] = ucs;
652 f->subfont_map[i].subfont = fe;
653 f->subfont_map[i].position = ucs;
654 } else {
655 /*
656 * This character is not yet assigned to a subfont.
657 */
658 f->subfont_map[i].subfont = NULL;
659 f->subfont_map[i].position = 0;
660 }
661 }
662
663 return f;
664 }
665
666 static int string_width(font_data *font, wchar_t const *string, int *errs)
667 {
668 int width = 0;
669
670 if (errs)
671 *errs = 0;
672
673 for (; *string; string++) {
674 int index;
675
676 index = font->bmp[(unsigned short)*string];
677 if (index == 0xFFFF) {
678 if (errs)
679 *errs = 1;
680 } else {
681 width += font->widths[index];
682 }
683 }
684
685 return width;
686 }
687
688 static int paper_width_internal(void *vctx, word *word, int *nspaces);
689
690 struct paper_width_ctx {
691 int minspacewidth;
692 para_data *pdata;
693 };
694
695 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
696 int w = 0;
697 while (text && text != end) {
698 w += paper_width_internal(vctx, text, nspaces);
699 text = text->next;
700 }
701 return w;
702 }
703
704 static int paper_width_internal(void *vctx, word *word, int *nspaces)
705 {
706 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
707 int style, type, findex, width, errs;
708 wchar_t *str;
709
710 switch (word->type) {
711 case word_HyperLink:
712 case word_HyperEnd:
713 case word_UpperXref:
714 case word_LowerXref:
715 case word_XrefEnd:
716 case word_IndexRef:
717 return 0;
718 }
719
720 style = towordstyle(word->type);
721 type = removeattr(word->type);
722
723 findex = (style == word_Normal ? FONT_NORMAL :
724 style == word_Emph ? FONT_EMPH :
725 FONT_CODE);
726
727 if (type == word_Normal) {
728 str = word->text;
729 } else if (type == word_WhiteSpace) {
730 if (findex != FONT_CODE) {
731 if (nspaces)
732 (*nspaces)++;
733 return ctx->minspacewidth;
734 } else
735 str = L" ";
736 } else /* if (type == word_Quote) */ {
737 if (word->aux == quote_Open)
738 str = L"\x2018"; /* FIXME: configurability! */
739 else
740 str = L"\x2019"; /* FIXME: configurability! */
741 }
742
743 width = string_width(ctx->pdata->fonts[findex], str, &errs);
744
745 if (errs && word->alt)
746 return paper_width_list(vctx, word->alt, NULL, nspaces);
747 else
748 return ctx->pdata->sizes[findex] * width;
749 }
750
751 static int paper_width(void *vctx, word *word)
752 {
753 return paper_width_internal(vctx, word, NULL);
754 }
755
756 static int paper_width_simple(para_data *pdata, word *text)
757 {
758 struct paper_width_ctx ctx;
759
760 ctx.pdata = pdata;
761 ctx.minspacewidth =
762 (pdata->sizes[FONT_NORMAL] *
763 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
764
765 return paper_width_list(&ctx, text, NULL, NULL);
766 }
767
768 static void wrap_paragraph(para_data *pdata, word *words,
769 int w, int i1, int i2)
770 {
771 wrappedline *wrapping, *p;
772 int spacewidth;
773 struct paper_width_ctx ctx;
774 int line_height;
775
776 /*
777 * We're going to need to store the line height in every line
778 * structure we generate.
779 */
780 {
781 int i;
782 line_height = 0;
783 for (i = 0; i < NFONTS; i++)
784 if (line_height < pdata->sizes[i])
785 line_height = pdata->sizes[i];
786 line_height *= 4096;
787 }
788
789 spacewidth = (pdata->sizes[FONT_NORMAL] *
790 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
791 if (spacewidth == 0) {
792 /*
793 * A font without a space?! Disturbing. I hope this never
794 * comes up, but I'll make a random guess anyway and set my
795 * space width to half the point size.
796 */
797 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
798 }
799
800 /*
801 * I'm going to set the _minimum_ space width to 3/5 of the
802 * standard one, and use the standard one as the optimum.
803 */
804 ctx.minspacewidth = spacewidth * 3 / 5;
805 ctx.pdata = pdata;
806
807 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
808
809 /*
810 * Having done the wrapping, we now concoct a set of line_data
811 * structures.
812 */
813 pdata->first = pdata->last = NULL;
814
815 for (p = wrapping; p; p = p->next) {
816 line_data *ldata;
817 word *wd;
818 int len, wid, spaces;
819
820 ldata = mknew(line_data);
821
822 ldata->pdata = pdata;
823 ldata->first = p->begin;
824 ldata->end = p->end;
825 ldata->line_height = line_height;
826
827 ldata->xpos = (p == wrapping ? i1 : i2);
828
829 if (pdata->last) {
830 pdata->last->next = ldata;
831 ldata->prev = pdata->last;
832 } else {
833 pdata->first = ldata;
834 ldata->prev = NULL;
835 }
836 ldata->next = NULL;
837 pdata->last = ldata;
838
839 spaces = 0;
840 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
841 wid = (p == wrapping ? w - i1 : w - i2);
842 wd = ldata->first;
843
844 ldata->hshortfall = wid - len;
845 ldata->nspaces = spaces;
846 /*
847 * This tells us how much the space width needs to
848 * change from _min_spacewidth. But we want to store
849 * its difference from the _natural_ space width, to
850 * make the text rendering easier.
851 */
852 ldata->hshortfall += ctx.minspacewidth * spaces;
853 ldata->hshortfall -= spacewidth * spaces;
854 /*
855 * Special case: on the last line of a paragraph, we
856 * never stretch spaces.
857 */
858 if (ldata->hshortfall > 0 && !p->next)
859 ldata->hshortfall = 0;
860
861 ldata->aux_text = NULL;
862 ldata->aux_text_2 = NULL;
863 ldata->aux_left_indent = 0;
864 ldata->penalty_before = ldata->penalty_after = 0;
865 }
866
867 }
868
869 static page_data *page_breaks(line_data *first, line_data *last,
870 int page_height)
871 {
872 line_data *l, *m;
873 page_data *ph, *pt;
874
875 /*
876 * Page breaking is done by a close analogue of the optimal
877 * paragraph wrapping algorithm used by wrap_para(). We work
878 * backwards from the end of the document line by line; for
879 * each line, we contemplate every possible number of lines we
880 * could put on a page starting with that line, determine a
881 * cost function for each one, add it to the pre-computed cost
882 * function for optimally page-breaking everything after that
883 * page, and pick the best option.
884 *
885 * Since my line_data structures are only used for this
886 * purpose, I might as well just store the algorithm data
887 * directly in them.
888 */
889
890 for (l = last; l; l = l->prev) {
891 int minheight, text = 0, space = 0;
892 int cost;
893
894 l->bestcost = -1;
895 for (m = l; m; m = m->next) {
896 if (m != l && m->page_break)
897 break; /* we've gone as far as we can */
898
899 if (m != l)
900 space += m->prev->space_after;
901 if (m != l || m->page_break)
902 space += m->space_before;
903 text += m->line_height;
904 minheight = text + space;
905
906 if (m != l && minheight > page_height)
907 break;
908
909 /*
910 * Compute the cost of this arrangement, as the square
911 * of the amount of wasted space on the page.
912 * Exception: if this is the last page before a
913 * mandatory break or the document end, we don't
914 * penalise a large blank area.
915 */
916 if (m->next && !m->next->page_break)
917 {
918 int x = page_height - minheight;
919 int xf;
920
921 xf = x & 0xFF;
922 x >>= 8;
923
924 cost = x*x;
925 cost += (x * xf) >> 8;
926 } else
927 cost = 0;
928
929 if (m->next && !m->next->page_break) {
930 cost += m->penalty_after;
931 cost += m->next->penalty_before;
932 }
933
934 if (m->next && !m->next->page_break)
935 cost += m->next->bestcost;
936 if (l->bestcost == -1 || l->bestcost > cost) {
937 /*
938 * This is the best option yet for this starting
939 * point.
940 */
941 l->bestcost = cost;
942 if (m->next && !m->next->page_break)
943 l->vshortfall = page_height - minheight;
944 else
945 l->vshortfall = 0;
946 l->text = text;
947 l->space = space;
948 l->page_last = m;
949 }
950 }
951 }
952
953 /*
954 * Now go through the line list forwards and assemble the
955 * actual pages.
956 */
957 ph = pt = NULL;
958
959 l = first;
960 while (l) {
961 page_data *page;
962 int text, space;
963
964 page = mknew(page_data);
965 page->next = NULL;
966 page->prev = pt;
967 if (pt)
968 pt->next = page;
969 else
970 ph = page;
971 pt = page;
972
973 page->first_line = l;
974 page->last_line = l->page_last;
975
976 page->first_text = page->last_text = NULL;
977 page->first_xref = page->last_xref = NULL;
978 page->first_rect = page->last_rect = NULL;
979
980 /*
981 * Now assign a y-coordinate to each line on the page.
982 */
983 text = space = 0;
984 for (l = page->first_line; l; l = l->next) {
985 if (l != page->first_line)
986 space += l->prev->space_after;
987 if (l != page->first_line || l->page_break)
988 space += l->space_before;
989 text += l->line_height;
990
991 l->page = page;
992 l->ypos = text + space +
993 space * (float)page->first_line->vshortfall /
994 page->first_line->space;
995
996 if (l == page->last_line)
997 break;
998 }
999
1000 l = page->last_line->next;
1001 }
1002
1003 return ph;
1004 }
1005
1006 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
1007 {
1008 rect *r = mknew(rect);
1009
1010 r->next = NULL;
1011 if (page->last_rect)
1012 page->last_rect->next = r;
1013 else
1014 page->first_rect = r;
1015 page->last_rect = r;
1016
1017 r->x = x;
1018 r->y = y;
1019 r->w = w;
1020 r->h = h;
1021 }
1022
1023 static void add_string_to_page(page_data *page, int x, int y,
1024 font_encoding *fe, int size, char *text)
1025 {
1026 text_fragment *frag;
1027
1028 frag = mknew(text_fragment);
1029 frag->next = NULL;
1030
1031 if (page->last_text)
1032 page->last_text->next = frag;
1033 else
1034 page->first_text = frag;
1035 page->last_text = frag;
1036
1037 frag->x = x;
1038 frag->y = y;
1039 frag->fe = fe;
1040 frag->fontsize = size;
1041 frag->text = dupstr(text);
1042 }
1043
1044 /*
1045 * Returns the updated x coordinate.
1046 */
1047 static int render_string(page_data *page, font_data *font, int fontsize,
1048 int x, int y, wchar_t *str)
1049 {
1050 char *text;
1051 int textpos, textwid, glyph;
1052 font_encoding *subfont = NULL, *sf;
1053
1054 text = mknewa(char, 1 + ustrlen(str));
1055 textpos = textwid = 0;
1056
1057 while (*str) {
1058 glyph = font->bmp[*str];
1059
1060 if (glyph == 0xFFFF)
1061 continue; /* nothing more we can do here */
1062
1063 /*
1064 * Find which subfont this character is going in.
1065 */
1066 sf = font->subfont_map[glyph].subfont;
1067
1068 if (!sf) {
1069 int c;
1070
1071 /*
1072 * This character is not yet in a subfont. Assign one.
1073 */
1074 if (font->latest_subfont->free_pos >= 0x100)
1075 font->latest_subfont = new_font_encoding(font);
1076
1077 c = font->latest_subfont->free_pos++;
1078 if (font->latest_subfont->free_pos == 0x7F)
1079 font->latest_subfont->free_pos = 0xA1;
1080
1081 font->subfont_map[glyph].subfont = font->latest_subfont;
1082 font->subfont_map[glyph].position = c;
1083 font->latest_subfont->vector[c] = font->glyphs[glyph];
1084 font->latest_subfont->indices[c] = glyph;
1085 font->latest_subfont->to_unicode[c] = *str;
1086
1087 sf = font->latest_subfont;
1088 }
1089
1090 if (!subfont || sf != subfont) {
1091 if (subfont) {
1092 text[textpos] = '\0';
1093 add_string_to_page(page, x, y, subfont, fontsize, text);
1094 x += textwid;
1095 } else {
1096 assert(textpos == 0);
1097 }
1098 textpos = 0;
1099 subfont = sf;
1100 }
1101
1102 text[textpos++] = font->subfont_map[glyph].position;
1103 textwid += font->widths[glyph] * fontsize;
1104
1105 str++;
1106 }
1107
1108 if (textpos > 0) {
1109 text[textpos] = '\0';
1110 add_string_to_page(page, x, y, subfont, fontsize, text);
1111 x += textwid;
1112 }
1113
1114 return x;
1115 }
1116
1117 /*
1118 * Returns the updated x coordinate.
1119 */
1120 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
1121 int x, int y, word *text, word *text_end, xref **xr,
1122 int shortfall, int nspaces, int *nspace,
1123 keywordlist *keywords)
1124 {
1125 while (text && text != text_end) {
1126 int style, type, findex, errs;
1127 wchar_t *str;
1128 xref_dest dest;
1129
1130 switch (text->type) {
1131 /*
1132 * Start a cross-reference.
1133 */
1134 case word_HyperLink:
1135 case word_UpperXref:
1136 case word_LowerXref:
1137
1138 if (text->type == word_HyperLink) {
1139 dest.type = URL;
1140 dest.url = utoa_dup(text->text);
1141 dest.page = NULL;
1142 } else {
1143 keyword *kwl = kw_lookup(keywords, text->text);
1144 para_data *pdata;
1145
1146 if (kwl) {
1147 assert(kwl->para->private_data);
1148 pdata = (para_data *) kwl->para->private_data;
1149 dest.type = PAGE;
1150 dest.page = pdata->first->page;
1151 dest.url = NULL;
1152 } else {
1153 /*
1154 * Shouldn't happen, but *shrug*
1155 */
1156 dest.type = NONE;
1157 dest.page = NULL;
1158 dest.url = NULL;
1159 }
1160 }
1161 if (dest.type != NONE) {
1162 *xr = mknew(xref);
1163 (*xr)->dest = dest; /* structure copy */
1164 if (page->last_xref)
1165 page->last_xref->next = *xr;
1166 else
1167 page->first_xref = *xr;
1168 page->last_xref = *xr;
1169 (*xr)->next = NULL;
1170
1171 /*
1172 * FIXME: Ideally we should have, and use, some
1173 * vertical font metric information here so that
1174 * our cross-ref rectangle can take account of
1175 * descenders and the font's cap height. This will
1176 * do for the moment, but it isn't ideal.
1177 */
1178 (*xr)->lx = (*xr)->rx = x;
1179 (*xr)->by = y;
1180 (*xr)->ty = y + ldata->line_height;
1181 }
1182 goto nextword;
1183
1184 /*
1185 * Finish extending a cross-reference box.
1186 */
1187 case word_HyperEnd:
1188 case word_XrefEnd:
1189 *xr = NULL;
1190 goto nextword;
1191
1192 case word_IndexRef:
1193 goto nextword;
1194 /*
1195 * FIXME: we should do something with this.
1196 */
1197 }
1198
1199 style = towordstyle(text->type);
1200 type = removeattr(text->type);
1201
1202 findex = (style == word_Normal ? FONT_NORMAL :
1203 style == word_Emph ? FONT_EMPH :
1204 FONT_CODE);
1205
1206 if (type == word_Normal) {
1207 str = text->text;
1208 } else if (type == word_WhiteSpace) {
1209 x += pdata->sizes[findex] *
1210 string_width(pdata->fonts[findex], L" ", NULL);
1211 if (nspaces && findex != FONT_CODE) {
1212 x += (*nspace+1) * shortfall / nspaces;
1213 x -= *nspace * shortfall / nspaces;
1214 (*nspace)++;
1215 }
1216 goto nextword;
1217 } else /* if (type == word_Quote) */ {
1218 if (text->aux == quote_Open)
1219 str = L"\x2018"; /* FIXME: configurability! */
1220 else
1221 str = L"\x2019"; /* FIXME: configurability! */
1222 }
1223
1224 (void) string_width(pdata->fonts[findex], str, &errs);
1225
1226 if (errs && text->alt)
1227 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1228 xr, shortfall, nspaces, nspace, keywords);
1229 else
1230 x = render_string(page, pdata->fonts[findex],
1231 pdata->sizes[findex], x, y, str);
1232
1233 if (*xr)
1234 (*xr)->rx = x;
1235
1236 nextword:
1237 text = text->next;
1238 }
1239
1240 return x;
1241 }
1242
1243 static void render_line(line_data *ldata, int left_x, int top_y,
1244 xref_dest *dest, keywordlist *keywords)
1245 {
1246 int nspace;
1247 xref *xr;
1248
1249 if (ldata->aux_text) {
1250 int x;
1251 xr = NULL;
1252 nspace = 0;
1253 x = render_text(ldata->page, ldata->pdata, ldata,
1254 left_x + ldata->aux_left_indent,
1255 top_y - ldata->ypos,
1256 ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1257 if (ldata->aux_text_2)
1258 render_text(ldata->page, ldata->pdata, ldata,
1259 x, top_y - ldata->ypos,
1260 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1261 }
1262 nspace = 0;
1263
1264 if (ldata->first) {
1265 /*
1266 * There might be a cross-reference carried over from a
1267 * previous line.
1268 */
1269 if (dest->type != NONE) {
1270 xr = mknew(xref);
1271 xr->next = NULL;
1272 xr->dest = *dest; /* structure copy */
1273 if (ldata->page->last_xref)
1274 ldata->page->last_xref->next = xr;
1275 else
1276 ldata->page->first_xref = xr;
1277 ldata->page->last_xref = xr;
1278 xr->lx = xr->rx = left_x + ldata->xpos;
1279 xr->by = top_y - ldata->ypos;
1280 xr->ty = top_y - ldata->ypos + ldata->line_height;
1281 } else
1282 xr = NULL;
1283
1284 render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1285 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1286 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1287
1288 if (xr) {
1289 /*
1290 * There's a cross-reference continued on to the next line.
1291 */
1292 *dest = xr->dest;
1293 } else
1294 dest->type = NONE;
1295 }
1296 }
1297
1298 static void code_paragraph(para_data *pdata,
1299 font_data *fn, font_data *fi, font_data *fb,
1300 int font_size, int indent, word *words)
1301 {
1302 /*
1303 * For code paragraphs, I'm going to hack grievously and
1304 * pretend the three normal fonts are the three code paragraph
1305 * fonts.
1306 */
1307 pdata->fonts[FONT_NORMAL] = fb;
1308 pdata->fonts[FONT_EMPH] = fi;
1309 pdata->fonts[FONT_CODE] = fn;
1310 pdata->sizes[FONT_NORMAL] =
1311 pdata->sizes[FONT_EMPH] =
1312 pdata->sizes[FONT_CODE] = font_size;
1313
1314 pdata->first = pdata->last = NULL;
1315
1316 for (; words; words = words->next) {
1317 wchar_t *t, *e, *start;
1318 word *lhead = NULL, *ltail = NULL, *w;
1319 line_data *ldata;
1320 int prev = -1, curr;
1321
1322 t = words->text;
1323 if (words->next && words->next->type == word_Emph) {
1324 e = words->next->text;
1325 words = words->next;
1326 } else
1327 e = NULL;
1328
1329 start = t;
1330
1331 while (*start) {
1332 while (*t) {
1333 if (!e || !*e)
1334 curr = 0;
1335 else if (*e == L'i')
1336 curr = 1;
1337 else if (*e == L'b')
1338 curr = 2;
1339 else
1340 curr = 0;
1341
1342 if (prev < 0)
1343 prev = curr;
1344
1345 if (curr != prev)
1346 break;
1347
1348 t++;
1349 if (e && *e)
1350 e++;
1351 }
1352
1353 /*
1354 * We've isolated a maximal subsequence of the line
1355 * which has the same emphasis. Form it into a word
1356 * structure.
1357 */
1358 w = mknew(word);
1359 w->next = NULL;
1360 w->alt = NULL;
1361 w->type = (prev == 0 ? word_WeakCode :
1362 prev == 1 ? word_Emph : word_Normal);
1363 w->text = mknewa(wchar_t, t-start+1);
1364 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1365 w->text[t-start] = '\0';
1366 w->breaks = FALSE;
1367
1368 if (ltail)
1369 ltail->next = w;
1370 else
1371 lhead = w;
1372 ltail = w;
1373
1374 start = t;
1375 prev = -1;
1376 }
1377
1378 ldata = mknew(line_data);
1379
1380 ldata->pdata = pdata;
1381 ldata->first = lhead;
1382 ldata->end = NULL;
1383 ldata->line_height = font_size * 4096;
1384
1385 ldata->xpos = indent;
1386
1387 if (pdata->last) {
1388 pdata->last->next = ldata;
1389 ldata->prev = pdata->last;
1390 } else {
1391 pdata->first = ldata;
1392 ldata->prev = NULL;
1393 }
1394 ldata->next = NULL;
1395 pdata->last = ldata;
1396
1397 ldata->hshortfall = 0;
1398 ldata->nspaces = 0;
1399 ldata->aux_text = NULL;
1400 ldata->aux_text_2 = NULL;
1401 ldata->aux_left_indent = 0;
1402 /* General opprobrium for breaking in a code paragraph. */
1403 ldata->penalty_before = ldata->penalty_after = 50000;
1404 }
1405 }
1406
1407 static void rule_paragraph(para_data *pdata, int indent, int height)
1408 {
1409 line_data *ldata;
1410
1411 ldata = mknew(line_data);
1412
1413 ldata->pdata = pdata;
1414 ldata->first = NULL;
1415 ldata->end = NULL;
1416 ldata->line_height = height;
1417
1418 ldata->xpos = indent;
1419
1420 ldata->prev = NULL;
1421 ldata->next = NULL;
1422
1423 ldata->hshortfall = 0;
1424 ldata->nspaces = 0;
1425 ldata->aux_text = NULL;
1426 ldata->aux_text_2 = NULL;
1427 ldata->aux_left_indent = 0;
1428
1429 /*
1430 * Better to break after a rule than before it
1431 */
1432 ldata->penalty_after += 100000;
1433 ldata->penalty_before += -100000;
1434
1435 pdata->first = pdata->last = ldata;
1436 }