Implemented lines under chapter titles.
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - implement para_Rule
16 *
17 * - set up contents section now we know what sections begin on
18 * which pages
19 *
20 * - do PDF outline
21 *
22 * - index
23 *
24 * - header/footer? Page numbers at least would be handy. Fully
25 * configurable footer can wait, though.
26 *
27 * That should bring us to the same level of functionality that
28 * original-Halibut had, and the same in PDF plus the obvious
29 * interactive navigation features. After that, in future work:
30 *
31 * - linearised PDF, perhaps?
32 *
33 * - I'm uncertain of whether I need to include a ToUnicode CMap
34 * in each of my font definitions in PDF. Currently things (by
35 * which I mean cut and paste out of acroread) seem to be
36 * working fairly happily without it, but I don't know.
37 *
38 * - configurability
39 *
40 * - title pages
41 */
42
43 #include <assert.h>
44 #include <stdio.h>
45
46 #include "halibut.h"
47 #include "paper.h"
48
49 static font_data *make_std_font(font_list *fontlist, char const *name);
50 static void wrap_paragraph(para_data *pdata, word *words,
51 int w, int i1, int i2);
52 static page_data *page_breaks(line_data *first, line_data *last,
53 int page_height);
54 static void render_line(line_data *ldata, int left_x, int top_y,
55 xref_dest *dest, keywordlist *keywords);
56 static int paper_width_simple(para_data *pdata, word *text);
57 static void code_paragraph(para_data *pdata,
58 font_data *fn, font_data *fi, font_data *fb,
59 int font_size, int indent, word *words);
60 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
61
62 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
63 indexdata *idx) {
64 paragraph *p;
65 document *doc;
66 int indent, extra_indent, firstline_indent, aux_indent;
67 para_data *pdata;
68 line_data *ldata, *firstline, *lastline;
69 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
70 page_data *pages;
71 font_list *fontlist;
72 word *aux, *aux2;
73
74 /*
75 * FIXME: All these things ought to become configurable.
76 */
77 int paper_width = 595 * 4096;
78 int paper_height = 841 * 4096;
79 int left_margin = 72 * 4096;
80 int top_margin = 72 * 4096;
81 int right_margin = 72 * 4096;
82 int bottom_margin = 108 * 4096;
83 int indent_list_bullet = 6 * 4096;
84 int indent_list = 24 * 4096;
85 int indent_quote = 18 * 4096;
86 int base_leading = 4096;
87 int base_para_spacing = 10 * 4096;
88 int chapter_top_space = 72 * 4096;
89 int sect_num_left_space = 12 * 4096;
90 int chapter_underline_depth = 14 * 4096;
91 int chapter_underline_thickness = 3 * 4096;
92
93 int base_width = paper_width - left_margin - right_margin;
94 int page_height = paper_height - top_margin - bottom_margin;
95
96 IGNORE(idx); /* FIXME */
97
98 /*
99 * First, set up some font structures.
100 */
101 fontlist = mknew(font_list);
102 fontlist->head = fontlist->tail = NULL;
103 tr = make_std_font(fontlist, "Times-Roman");
104 ti = make_std_font(fontlist, "Times-Italic");
105 hr = make_std_font(fontlist, "Helvetica-Bold");
106 hi = make_std_font(fontlist, "Helvetica-BoldOblique");
107 cr = make_std_font(fontlist, "Courier");
108 co = make_std_font(fontlist, "Courier-Oblique");
109 cb = make_std_font(fontlist, "Courier-Bold");
110
111 /*
112 * Go through and break up each paragraph into lines.
113 */
114 indent = 0;
115 firstline = lastline = NULL;
116 for (p = sourceform; p; p = p->next) {
117 p->private_data = NULL;
118
119 switch (p->type) {
120 /*
121 * These paragraph types are either invisible or don't
122 * define text in the normal sense. Either way, they
123 * don't require wrapping.
124 */
125 case para_IM:
126 case para_BR:
127 case para_Biblio:
128 case para_NotParaType:
129 case para_Config:
130 case para_VersionID:
131 case para_NoCite:
132 break;
133
134 /*
135 * These paragraph types don't require wrapping, but
136 * they do affect the line width to which we wrap the
137 * rest of the paragraphs, so we need to pay attention.
138 */
139 case para_LcontPush:
140 indent += indent_list; break;
141 case para_LcontPop:
142 indent -= indent_list; assert(indent >= 0); break;
143 case para_QuotePush:
144 indent += indent_quote; break;
145 case para_QuotePop:
146 indent -= indent_quote; assert(indent >= 0); break;
147
148 /*
149 * This paragraph type is special. Process it
150 * specially.
151 */
152 case para_Code:
153 pdata = mknew(para_data);
154 code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
155 p->private_data = pdata;
156 if (pdata->first != pdata->last) {
157 pdata->first->penalty_after += 100000;
158 pdata->last->penalty_before += 100000;
159 }
160 break;
161
162 /*
163 * All of these paragraph types require wrapping in the
164 * ordinary way. So we must supply a set of fonts, a
165 * line width and auxiliary information (e.g. bullet
166 * text) for each one.
167 */
168 case para_Chapter:
169 case para_Appendix:
170 case para_UnnumberedChapter:
171 case para_Heading:
172 case para_Subsect:
173 case para_Normal:
174 case para_BiblioCited:
175 case para_Bullet:
176 case para_NumberedList:
177 case para_DescribedThing:
178 case para_Description:
179 case para_Copyright:
180 case para_Title:
181 pdata = mknew(para_data);
182
183 /*
184 * Choose fonts for this paragraph.
185 *
186 * FIXME: All of this ought to be completely
187 * user-configurable.
188 */
189 switch (p->type) {
190 case para_Title:
191 pdata->fonts[FONT_NORMAL] = hr;
192 pdata->sizes[FONT_NORMAL] = 24;
193 pdata->fonts[FONT_EMPH] = hi;
194 pdata->sizes[FONT_EMPH] = 24;
195 pdata->fonts[FONT_CODE] = cb;
196 pdata->sizes[FONT_CODE] = 24;
197 break;
198
199 case para_Chapter:
200 case para_Appendix:
201 case para_UnnumberedChapter:
202 pdata->fonts[FONT_NORMAL] = hr;
203 pdata->sizes[FONT_NORMAL] = 20;
204 pdata->fonts[FONT_EMPH] = hi;
205 pdata->sizes[FONT_EMPH] = 20;
206 pdata->fonts[FONT_CODE] = cb;
207 pdata->sizes[FONT_CODE] = 20;
208 break;
209
210 case para_Heading:
211 case para_Subsect:
212 pdata->fonts[FONT_NORMAL] = hr;
213 pdata->fonts[FONT_EMPH] = hi;
214 pdata->fonts[FONT_CODE] = cb;
215 pdata->sizes[FONT_NORMAL] =
216 pdata->sizes[FONT_EMPH] =
217 pdata->sizes[FONT_CODE] =
218 (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
219 break;
220
221 case para_Normal:
222 case para_BiblioCited:
223 case para_Bullet:
224 case para_NumberedList:
225 case para_DescribedThing:
226 case para_Description:
227 case para_Copyright:
228 pdata->fonts[FONT_NORMAL] = tr;
229 pdata->sizes[FONT_NORMAL] = 12;
230 pdata->fonts[FONT_EMPH] = ti;
231 pdata->sizes[FONT_EMPH] = 12;
232 pdata->fonts[FONT_CODE] = cr;
233 pdata->sizes[FONT_CODE] = 12;
234 break;
235 }
236
237 /*
238 * Also select an indentation level depending on the
239 * paragraph type (list paragraphs other than
240 * para_DescribedThing need extra indent).
241 *
242 * (FIXME: Perhaps at some point we might even arrange
243 * for the user to be able to request indented first
244 * lines in paragraphs.)
245 */
246 if (p->type == para_Bullet ||
247 p->type == para_NumberedList ||
248 p->type == para_Description) {
249 extra_indent = firstline_indent = indent_list;
250 } else {
251 extra_indent = firstline_indent = 0;
252 }
253
254 /*
255 * Find the auxiliary text for this paragraph.
256 */
257 aux = aux2 = NULL;
258 aux_indent = 0;
259
260 switch (p->type) {
261 case para_Chapter:
262 case para_Appendix:
263 case para_Heading:
264 case para_Subsect:
265 /*
266 * For some heading styles (FIXME: be able to
267 * configure which), the auxiliary text contains
268 * the chapter number and is arranged to be
269 * right-aligned a few points left of the primary
270 * margin. For other styles, the auxiliary text is
271 * the full chapter _name_ and takes up space
272 * within the (wrapped) chapter title, meaning that
273 * we must move the first line indent over to make
274 * space for it.
275 */
276 if (p->type == para_Heading || p->type == para_Subsect) {
277 int len;
278
279 aux = p->kwtext2;
280 len = paper_width_simple(pdata, p->kwtext2);
281 aux_indent = -len - sect_num_left_space;
282 } else {
283 aux = p->kwtext;
284 aux2 = mknew(word);
285 aux2->next = NULL;
286 aux2->alt = NULL;
287 aux2->type = word_Normal;
288 aux2->text = ustrdup(L": ");
289 aux2->breaks = FALSE;
290 aux2->aux = 0;
291 aux_indent = 0;
292
293 firstline_indent += paper_width_simple(pdata, aux);
294 firstline_indent += paper_width_simple(pdata, aux2);
295 }
296 break;
297
298 case para_Bullet:
299 /*
300 * Auxiliary text consisting of a bullet. (FIXME:
301 * configurable bullet.)
302 */
303 aux = mknew(word);
304 aux->next = NULL;
305 aux->alt = NULL;
306 aux->type = word_Normal;
307 aux->text = ustrdup(L"\x2022");
308 aux->breaks = FALSE;
309 aux->aux = 0;
310 aux_indent = indent + indent_list_bullet;
311 break;
312
313 case para_NumberedList:
314 /*
315 * Auxiliary text consisting of the number followed
316 * by a (FIXME: configurable) full stop.
317 */
318 aux = p->kwtext;
319 aux2 = mknew(word);
320 aux2->next = NULL;
321 aux2->alt = NULL;
322 aux2->type = word_Normal;
323 aux2->text = ustrdup(L".");
324 aux2->breaks = FALSE;
325 aux2->aux = 0;
326 aux_indent = indent + indent_list_bullet;
327 break;
328
329 case para_BiblioCited:
330 /*
331 * Auxiliary text consisting of the bibliography
332 * reference text, and a trailing space.
333 */
334 aux = p->kwtext;
335 aux2 = mknew(word);
336 aux2->next = NULL;
337 aux2->alt = NULL;
338 aux2->type = word_Normal;
339 aux2->text = ustrdup(L" ");
340 aux2->breaks = FALSE;
341 aux2->aux = 0;
342 aux_indent = indent;
343 firstline_indent += paper_width_simple(pdata, aux);
344 firstline_indent += paper_width_simple(pdata, aux2);
345 break;
346 }
347
348 wrap_paragraph(pdata, p->words, base_width,
349 indent + firstline_indent,
350 indent + extra_indent);
351
352 p->private_data = pdata;
353
354 pdata->first->aux_text = aux;
355 pdata->first->aux_text_2 = aux2;
356 pdata->first->aux_left_indent = aux_indent;
357
358 /*
359 * Line breaking penalties.
360 */
361 switch (p->type) {
362 case para_Chapter:
363 case para_Appendix:
364 case para_Heading:
365 case para_Subsect:
366 case para_UnnumberedChapter:
367 /*
368 * Fixed and large penalty for breaking straight
369 * after a heading; corresponding bonus for
370 * breaking straight before.
371 */
372 pdata->first->penalty_before = -500000;
373 pdata->last->penalty_after = 500000;
374 for (ldata = pdata->first; ldata; ldata = ldata->next)
375 ldata->penalty_after = 500000;
376 break;
377
378 case para_DescribedThing:
379 /*
380 * This is treated a bit like a small heading:
381 * there's a penalty for breaking after it (i.e.
382 * between it and its description), and a bonus for
383 * breaking before it (actually _between_ list
384 * items).
385 */
386 pdata->first->penalty_before = -200000;
387 pdata->last->penalty_after = 200000;
388 break;
389
390 default:
391 /*
392 * Most paragraph types: widow/orphan control by
393 * discouraging breaking one line from the end of
394 * any paragraph.
395 */
396 if (pdata->first != pdata->last) {
397 pdata->first->penalty_after = 100000;
398 pdata->last->penalty_before = 100000;
399 }
400 break;
401 }
402
403 break;
404 }
405
406 if (p->private_data) {
407 pdata = (para_data *)p->private_data;
408
409 /*
410 * Set the line spacing for each line in this paragraph.
411 */
412 for (ldata = pdata->first; ldata; ldata = ldata->next) {
413 if (ldata == pdata->first)
414 ldata->space_before = base_para_spacing / 2;
415 else
416 ldata->space_before = base_leading / 2;
417 if (ldata == pdata->last)
418 ldata->space_after = base_para_spacing / 2;
419 else
420 ldata->space_after = base_leading / 2;
421 ldata->page_break = FALSE;
422 }
423
424 /*
425 * Some kinds of section heading do require a page
426 * break before them.
427 */
428 if (p->type == para_Title ||
429 p->type == para_Chapter ||
430 p->type == para_Appendix ||
431 p->type == para_UnnumberedChapter) {
432 pdata->first->page_break = TRUE;
433 pdata->first->space_before = chapter_top_space;
434 pdata->last->space_after +=
435 chapter_underline_depth + chapter_underline_thickness;
436 }
437
438 /*
439 * Link all line structures together into a big list.
440 */
441 if (pdata->first) {
442 if (lastline) {
443 lastline->next = pdata->first;
444 pdata->first->prev = lastline;
445 } else {
446 firstline = pdata->first;
447 pdata->first->prev = NULL;
448 }
449 lastline = pdata->last;
450 }
451 }
452 }
453
454 /*
455 * Now we have an enormous linked list of every line of text in
456 * the document. Break it up into pages.
457 */
458 pages = page_breaks(firstline, lastline, page_height);
459
460 /*
461 * Now we're ready to actually lay out the pages. We do this by
462 * looping over _paragraphs_, since we may need to track cross-
463 * references between lines and even across pages.
464 */
465 for (p = sourceform; p; p = p->next) {
466 pdata = (para_data *)p->private_data;
467
468 if (pdata) {
469 xref_dest dest;
470 dest.type = NONE;
471 for (ldata = pdata->first; ldata; ldata = ldata->next) {
472 render_line(ldata, left_margin, paper_height - top_margin,
473 &dest, keywords);
474 if (ldata == pdata->last)
475 break;
476 }
477
478 /*
479 * Some section headings (FIXME: should be configurable
480 * which) want to be underlined.
481 */
482 if (p->type == para_Chapter || p->type == para_Appendix ||
483 p->type == para_UnnumberedChapter || p->type == para_Title) {
484 add_rect_to_page(pdata->last->page,
485 left_margin,
486 (paper_height - top_margin -
487 pdata->last->ypos - chapter_underline_depth),
488 base_width,
489 chapter_underline_thickness);
490 }
491 }
492 }
493
494 doc = mknew(document);
495 doc->fonts = fontlist;
496 doc->pages = pages;
497 doc->paper_width = paper_width;
498 doc->paper_height = paper_height;
499 return doc;
500 }
501
502 static font_encoding *new_font_encoding(font_data *font)
503 {
504 font_encoding *fe;
505 int i;
506
507 fe = mknew(font_encoding);
508 fe->next = NULL;
509
510 if (font->list->tail)
511 font->list->tail->next = fe;
512 else
513 font->list->head = fe;
514 font->list->tail = fe;
515
516 fe->font = font;
517 fe->free_pos = 0x21;
518
519 for (i = 0; i < 256; i++) {
520 fe->vector[i] = NULL;
521 fe->indices[i] = -1;
522 fe->to_unicode[i] = 0xFFFF;
523 }
524
525 return fe;
526 }
527
528 static font_data *make_std_font(font_list *fontlist, char const *name)
529 {
530 const int *widths;
531 int nglyphs;
532 font_data *f;
533 font_encoding *fe;
534 int i;
535
536 widths = ps_std_font_widths(name);
537 if (!widths)
538 return NULL;
539
540 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
541
542 f = mknew(font_data);
543
544 f->list = fontlist;
545 f->name = name;
546 f->nglyphs = nglyphs;
547 f->glyphs = ps_std_glyphs;
548 f->widths = widths;
549 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
550
551 /*
552 * Our first subfont will contain all of US-ASCII. This isn't
553 * really necessary - we could just create custom subfonts
554 * precisely as the whim of render_string dictated - but
555 * instinct suggests that it might be nice to have the text in
556 * the output files look _marginally_ recognisable.
557 */
558 fe = new_font_encoding(f);
559 fe->free_pos = 0xA1; /* only the top half is free */
560 f->latest_subfont = fe;
561
562 for (i = 0; i < (int)lenof(f->bmp); i++)
563 f->bmp[i] = 0xFFFF;
564
565 for (i = 0; i < nglyphs; i++) {
566 wchar_t ucs;
567 ucs = ps_glyph_to_unicode(f->glyphs[i]);
568 assert(ucs != 0xFFFF);
569 f->bmp[ucs] = i;
570 if (ucs >= 0x20 && ucs <= 0x7E) {
571 fe->vector[ucs] = f->glyphs[i];
572 fe->indices[ucs] = i;
573 fe->to_unicode[ucs] = ucs;
574 f->subfont_map[i].subfont = fe;
575 f->subfont_map[i].position = ucs;
576 } else {
577 /*
578 * This character is not yet assigned to a subfont.
579 */
580 f->subfont_map[i].subfont = NULL;
581 f->subfont_map[i].position = 0;
582 }
583 }
584
585 return f;
586 }
587
588 static int string_width(font_data *font, wchar_t const *string, int *errs)
589 {
590 int width = 0;
591
592 if (errs)
593 *errs = 0;
594
595 for (; *string; string++) {
596 int index;
597
598 index = font->bmp[(unsigned short)*string];
599 if (index == 0xFFFF) {
600 if (errs)
601 *errs = 1;
602 } else {
603 width += font->widths[index];
604 }
605 }
606
607 return width;
608 }
609
610 static int paper_width_internal(void *vctx, word *word, int *nspaces);
611
612 struct paper_width_ctx {
613 int minspacewidth;
614 para_data *pdata;
615 };
616
617 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
618 int w = 0;
619 while (text && text != end) {
620 w += paper_width_internal(vctx, text, nspaces);
621 text = text->next;
622 }
623 return w;
624 }
625
626 static int paper_width_internal(void *vctx, word *word, int *nspaces)
627 {
628 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
629 int style, type, findex, width, errs;
630 wchar_t *str;
631
632 switch (word->type) {
633 case word_HyperLink:
634 case word_HyperEnd:
635 case word_UpperXref:
636 case word_LowerXref:
637 case word_XrefEnd:
638 case word_IndexRef:
639 return 0;
640 }
641
642 style = towordstyle(word->type);
643 type = removeattr(word->type);
644
645 findex = (style == word_Normal ? FONT_NORMAL :
646 style == word_Emph ? FONT_EMPH :
647 FONT_CODE);
648
649 if (type == word_Normal) {
650 str = word->text;
651 } else if (type == word_WhiteSpace) {
652 if (findex != FONT_CODE) {
653 if (nspaces)
654 (*nspaces)++;
655 return ctx->minspacewidth;
656 } else
657 str = L" ";
658 } else /* if (type == word_Quote) */ {
659 if (word->aux == quote_Open)
660 str = L"\x2018"; /* FIXME: configurability! */
661 else
662 str = L"\x2019"; /* FIXME: configurability! */
663 }
664
665 width = string_width(ctx->pdata->fonts[findex], str, &errs);
666
667 if (errs && word->alt)
668 return paper_width_list(vctx, word->alt, NULL, nspaces);
669 else
670 return ctx->pdata->sizes[findex] * width;
671 }
672
673 static int paper_width(void *vctx, word *word)
674 {
675 return paper_width_internal(vctx, word, NULL);
676 }
677
678 static int paper_width_simple(para_data *pdata, word *text)
679 {
680 struct paper_width_ctx ctx;
681
682 ctx.pdata = pdata;
683 ctx.minspacewidth =
684 (pdata->sizes[FONT_NORMAL] *
685 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
686
687 return paper_width_list(&ctx, text, NULL, NULL);
688 }
689
690 static void wrap_paragraph(para_data *pdata, word *words,
691 int w, int i1, int i2)
692 {
693 wrappedline *wrapping, *p;
694 int spacewidth;
695 struct paper_width_ctx ctx;
696 int line_height;
697
698 /*
699 * We're going to need to store the line height in every line
700 * structure we generate.
701 */
702 {
703 int i;
704 line_height = 0;
705 for (i = 0; i < NFONTS; i++)
706 if (line_height < pdata->sizes[i])
707 line_height = pdata->sizes[i];
708 line_height *= 4096;
709 }
710
711 spacewidth = (pdata->sizes[FONT_NORMAL] *
712 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
713 if (spacewidth == 0) {
714 /*
715 * A font without a space?! Disturbing. I hope this never
716 * comes up, but I'll make a random guess anyway and set my
717 * space width to half the point size.
718 */
719 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
720 }
721
722 /*
723 * I'm going to set the _minimum_ space width to 3/5 of the
724 * standard one, and use the standard one as the optimum.
725 */
726 ctx.minspacewidth = spacewidth * 3 / 5;
727 ctx.pdata = pdata;
728
729 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
730
731 /*
732 * Having done the wrapping, we now concoct a set of line_data
733 * structures.
734 */
735 pdata->first = pdata->last = NULL;
736
737 for (p = wrapping; p; p = p->next) {
738 line_data *ldata;
739 word *wd;
740 int len, wid, spaces;
741
742 ldata = mknew(line_data);
743
744 ldata->pdata = pdata;
745 ldata->first = p->begin;
746 ldata->end = p->end;
747 ldata->line_height = line_height;
748
749 ldata->xpos = (p == wrapping ? i1 : i2);
750
751 if (pdata->last) {
752 pdata->last->next = ldata;
753 ldata->prev = pdata->last;
754 } else {
755 pdata->first = ldata;
756 ldata->prev = NULL;
757 }
758 ldata->next = NULL;
759 pdata->last = ldata;
760
761 spaces = 0;
762 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
763 wid = (p == wrapping ? w - i1 : w - i2);
764 wd = ldata->first;
765
766 ldata->hshortfall = wid - len;
767 ldata->nspaces = spaces;
768 /*
769 * This tells us how much the space width needs to
770 * change from _min_spacewidth. But we want to store
771 * its difference from the _natural_ space width, to
772 * make the text rendering easier.
773 */
774 ldata->hshortfall += ctx.minspacewidth * spaces;
775 ldata->hshortfall -= spacewidth * spaces;
776 /*
777 * Special case: on the last line of a paragraph, we
778 * never stretch spaces.
779 */
780 if (ldata->hshortfall > 0 && !p->next)
781 ldata->hshortfall = 0;
782
783 ldata->aux_text = NULL;
784 ldata->aux_text_2 = NULL;
785 ldata->aux_left_indent = 0;
786 ldata->penalty_before = ldata->penalty_after = 0;
787 }
788
789 }
790
791 static page_data *page_breaks(line_data *first, line_data *last,
792 int page_height)
793 {
794 line_data *l, *m;
795 page_data *ph, *pt;
796
797 /*
798 * Page breaking is done by a close analogue of the optimal
799 * paragraph wrapping algorithm used by wrap_para(). We work
800 * backwards from the end of the document line by line; for
801 * each line, we contemplate every possible number of lines we
802 * could put on a page starting with that line, determine a
803 * cost function for each one, add it to the pre-computed cost
804 * function for optimally page-breaking everything after that
805 * page, and pick the best option.
806 *
807 * Since my line_data structures are only used for this
808 * purpose, I might as well just store the algorithm data
809 * directly in them.
810 */
811
812 for (l = last; l; l = l->prev) {
813 int minheight, text = 0, space = 0;
814 int cost;
815
816 l->bestcost = -1;
817 for (m = l; m; m = m->next) {
818 if (m != l && m->page_break)
819 break; /* we've gone as far as we can */
820
821 if (m != l)
822 space += m->prev->space_after;
823 if (m != l || m->page_break)
824 space += m->space_before;
825 text += m->line_height;
826 minheight = text + space;
827
828 if (m != l && minheight > page_height)
829 break;
830
831 /*
832 * Compute the cost of this arrangement, as the square
833 * of the amount of wasted space on the page.
834 * Exception: if this is the last page before a
835 * mandatory break or the document end, we don't
836 * penalise a large blank area.
837 */
838 if (m->next && !m->next->page_break)
839 {
840 int x = page_height - minheight;
841 int xf;
842
843 xf = x & 0xFF;
844 x >>= 8;
845
846 cost = x*x;
847 cost += (x * xf) >> 8;
848 } else
849 cost = 0;
850
851 if (m->next && !m->next->page_break) {
852 cost += m->penalty_after;
853 cost += m->next->penalty_before;
854 }
855
856 if (m->next && !m->next->page_break)
857 cost += m->next->bestcost;
858 if (l->bestcost == -1 || l->bestcost > cost) {
859 /*
860 * This is the best option yet for this starting
861 * point.
862 */
863 l->bestcost = cost;
864 if (m->next && !m->next->page_break)
865 l->vshortfall = page_height - minheight;
866 else
867 l->vshortfall = 0;
868 l->text = text;
869 l->space = space;
870 l->page_last = m;
871 }
872 }
873 }
874
875 /*
876 * Now go through the line list forwards and assemble the
877 * actual pages.
878 */
879 ph = pt = NULL;
880
881 l = first;
882 while (l) {
883 page_data *page;
884 int text, space;
885
886 page = mknew(page_data);
887 page->next = NULL;
888 page->prev = pt;
889 if (pt)
890 pt->next = page;
891 else
892 ph = page;
893 pt = page;
894
895 page->first_line = l;
896 page->last_line = l->page_last;
897
898 page->first_text = page->last_text = NULL;
899 page->first_xref = page->last_xref = NULL;
900 page->first_rect = page->last_rect = NULL;
901
902 /*
903 * Now assign a y-coordinate to each line on the page.
904 */
905 text = space = 0;
906 for (l = page->first_line; l; l = l->next) {
907 if (l != page->first_line)
908 space += l->prev->space_after;
909 if (l != page->first_line || l->page_break)
910 space += l->space_before;
911 text += l->line_height;
912
913 l->page = page;
914 l->ypos = text + space +
915 space * (float)page->first_line->vshortfall /
916 page->first_line->space;
917
918 if (l == page->last_line)
919 break;
920 }
921
922 l = page->last_line->next;
923 }
924
925 return ph;
926 }
927
928 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
929 {
930 rect *r = mknew(rect);
931
932 r->next = NULL;
933 if (page->last_rect)
934 page->last_rect->next = r;
935 else
936 page->first_rect = r;
937 page->last_rect = r;
938
939 r->x = x;
940 r->y = y;
941 r->w = w;
942 r->h = h;
943 }
944
945 static void add_string_to_page(page_data *page, int x, int y,
946 font_encoding *fe, int size, char *text)
947 {
948 text_fragment *frag;
949
950 frag = mknew(text_fragment);
951 frag->next = NULL;
952
953 if (page->last_text)
954 page->last_text->next = frag;
955 else
956 page->first_text = frag;
957 page->last_text = frag;
958
959 frag->x = x;
960 frag->y = y;
961 frag->fe = fe;
962 frag->fontsize = size;
963 frag->text = dupstr(text);
964 }
965
966 /*
967 * Returns the updated x coordinate.
968 */
969 static int render_string(page_data *page, font_data *font, int fontsize,
970 int x, int y, wchar_t *str)
971 {
972 char *text;
973 int textpos, textwid, glyph;
974 font_encoding *subfont = NULL, *sf;
975
976 text = mknewa(char, 1 + ustrlen(str));
977 textpos = textwid = 0;
978
979 while (*str) {
980 glyph = font->bmp[*str];
981
982 if (glyph == 0xFFFF)
983 continue; /* nothing more we can do here */
984
985 /*
986 * Find which subfont this character is going in.
987 */
988 sf = font->subfont_map[glyph].subfont;
989
990 if (!sf) {
991 int c;
992
993 /*
994 * This character is not yet in a subfont. Assign one.
995 */
996 if (font->latest_subfont->free_pos >= 0x100)
997 font->latest_subfont = new_font_encoding(font);
998
999 c = font->latest_subfont->free_pos++;
1000 if (font->latest_subfont->free_pos == 0x7F)
1001 font->latest_subfont->free_pos = 0xA1;
1002
1003 font->subfont_map[glyph].subfont = font->latest_subfont;
1004 font->subfont_map[glyph].position = c;
1005 font->latest_subfont->vector[c] = font->glyphs[glyph];
1006 font->latest_subfont->indices[c] = glyph;
1007 font->latest_subfont->to_unicode[c] = *str;
1008
1009 sf = font->latest_subfont;
1010 }
1011
1012 if (!subfont || sf != subfont) {
1013 if (subfont) {
1014 text[textpos] = '\0';
1015 add_string_to_page(page, x, y, subfont, fontsize, text);
1016 x += textwid;
1017 } else {
1018 assert(textpos == 0);
1019 }
1020 textpos = 0;
1021 subfont = sf;
1022 }
1023
1024 text[textpos++] = font->subfont_map[glyph].position;
1025 textwid += font->widths[glyph] * fontsize;
1026
1027 str++;
1028 }
1029
1030 if (textpos > 0) {
1031 text[textpos] = '\0';
1032 add_string_to_page(page, x, y, subfont, fontsize, text);
1033 x += textwid;
1034 }
1035
1036 return x;
1037 }
1038
1039 /*
1040 * Returns the updated x coordinate.
1041 */
1042 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
1043 int x, int y, word *text, word *text_end, xref **xr,
1044 int shortfall, int nspaces, int *nspace,
1045 keywordlist *keywords)
1046 {
1047 while (text && text != text_end) {
1048 int style, type, findex, errs;
1049 wchar_t *str;
1050 xref_dest dest;
1051
1052 switch (text->type) {
1053 /*
1054 * Start a cross-reference.
1055 */
1056 case word_HyperLink:
1057 case word_UpperXref:
1058 case word_LowerXref:
1059
1060 if (text->type == word_HyperLink) {
1061 dest.type = URL;
1062 dest.url = utoa_dup(text->text);
1063 dest.page = NULL;
1064 } else {
1065 keyword *kwl = kw_lookup(keywords, text->text);
1066 para_data *pdata;
1067
1068 if (kwl) {
1069 assert(kwl->para->private_data);
1070 pdata = (para_data *) kwl->para->private_data;
1071 dest.type = PAGE;
1072 dest.page = pdata->first->page;
1073 dest.url = NULL;
1074 } else {
1075 /*
1076 * Shouldn't happen, but *shrug*
1077 */
1078 dest.type = NONE;
1079 dest.page = NULL;
1080 dest.url = NULL;
1081 }
1082 }
1083 if (dest.type != NONE) {
1084 *xr = mknew(xref);
1085 (*xr)->dest = dest; /* structure copy */
1086 if (page->last_xref)
1087 page->last_xref->next = *xr;
1088 else
1089 page->first_xref = *xr;
1090 page->last_xref = *xr;
1091 (*xr)->next = NULL;
1092
1093 /*
1094 * FIXME: Ideally we should have, and use, some
1095 * vertical font metric information here so that
1096 * our cross-ref rectangle can take account of
1097 * descenders and the font's cap height. This will
1098 * do for the moment, but it isn't ideal.
1099 */
1100 (*xr)->lx = (*xr)->rx = x;
1101 (*xr)->by = y;
1102 (*xr)->ty = y + ldata->line_height;
1103 }
1104 goto nextword;
1105
1106 /*
1107 * Finish extending a cross-reference box.
1108 */
1109 case word_HyperEnd:
1110 case word_XrefEnd:
1111 *xr = NULL;
1112 goto nextword;
1113
1114 case word_IndexRef:
1115 goto nextword;
1116 /*
1117 * FIXME: we should do something with this.
1118 */
1119 }
1120
1121 style = towordstyle(text->type);
1122 type = removeattr(text->type);
1123
1124 findex = (style == word_Normal ? FONT_NORMAL :
1125 style == word_Emph ? FONT_EMPH :
1126 FONT_CODE);
1127
1128 if (type == word_Normal) {
1129 str = text->text;
1130 } else if (type == word_WhiteSpace) {
1131 x += pdata->sizes[findex] *
1132 string_width(pdata->fonts[findex], L" ", NULL);
1133 if (nspaces && findex != FONT_CODE) {
1134 x += (*nspace+1) * shortfall / nspaces;
1135 x -= *nspace * shortfall / nspaces;
1136 (*nspace)++;
1137 }
1138 goto nextword;
1139 } else /* if (type == word_Quote) */ {
1140 if (text->aux == quote_Open)
1141 str = L"\x2018"; /* FIXME: configurability! */
1142 else
1143 str = L"\x2019"; /* FIXME: configurability! */
1144 }
1145
1146 (void) string_width(pdata->fonts[findex], str, &errs);
1147
1148 if (errs && text->alt)
1149 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1150 xr, shortfall, nspaces, nspace, keywords);
1151 else
1152 x = render_string(page, pdata->fonts[findex],
1153 pdata->sizes[findex], x, y, str);
1154
1155 if (*xr)
1156 (*xr)->rx = x;
1157
1158 nextword:
1159 text = text->next;
1160 }
1161
1162 return x;
1163 }
1164
1165 static void render_line(line_data *ldata, int left_x, int top_y,
1166 xref_dest *dest, keywordlist *keywords)
1167 {
1168 int nspace;
1169 xref *xr;
1170
1171 if (ldata->aux_text) {
1172 int x;
1173 xr = NULL;
1174 nspace = 0;
1175 x = render_text(ldata->page, ldata->pdata, ldata,
1176 left_x + ldata->aux_left_indent,
1177 top_y - ldata->ypos,
1178 ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1179 if (ldata->aux_text_2)
1180 render_text(ldata->page, ldata->pdata, ldata,
1181 x, top_y - ldata->ypos,
1182 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1183 }
1184 nspace = 0;
1185
1186 /*
1187 * There might be a cross-reference carried over from a
1188 * previous line.
1189 */
1190 if (dest->type != NONE) {
1191 xr = mknew(xref);
1192 xr->next = NULL;
1193 xr->dest = *dest; /* structure copy */
1194 if (ldata->page->last_xref)
1195 ldata->page->last_xref->next = xr;
1196 else
1197 ldata->page->first_xref = xr;
1198 ldata->page->last_xref = xr;
1199 xr->lx = xr->rx = left_x + ldata->xpos;
1200 xr->by = top_y - ldata->ypos;
1201 xr->ty = top_y - ldata->ypos + ldata->line_height;
1202 } else
1203 xr = NULL;
1204
1205 render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1206 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1207 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1208
1209 if (xr) {
1210 /*
1211 * There's a cross-reference continued on to the next line.
1212 */
1213 *dest = xr->dest;
1214 } else
1215 dest->type = NONE;
1216 }
1217
1218 static void code_paragraph(para_data *pdata,
1219 font_data *fn, font_data *fi, font_data *fb,
1220 int font_size, int indent, word *words)
1221 {
1222 /*
1223 * For code paragraphs, I'm going to hack grievously and
1224 * pretend the three normal fonts are the three code paragraph
1225 * fonts.
1226 */
1227 pdata->fonts[FONT_NORMAL] = fb;
1228 pdata->fonts[FONT_EMPH] = fi;
1229 pdata->fonts[FONT_CODE] = fn;
1230 pdata->sizes[FONT_NORMAL] =
1231 pdata->sizes[FONT_EMPH] =
1232 pdata->sizes[FONT_CODE] = font_size;
1233
1234 pdata->first = pdata->last = NULL;
1235
1236 for (; words; words = words->next) {
1237 wchar_t *t, *e, *start;
1238 word *lhead = NULL, *ltail = NULL, *w;
1239 line_data *ldata;
1240 int prev = -1, curr;
1241
1242 t = words->text;
1243 if (words->next && words->next->type == word_Emph) {
1244 e = words->next->text;
1245 words = words->next;
1246 } else
1247 e = NULL;
1248
1249 start = t;
1250
1251 while (*start) {
1252 while (*t) {
1253 if (!e || !*e)
1254 curr = 0;
1255 else if (*e == L'i')
1256 curr = 1;
1257 else if (*e == L'b')
1258 curr = 2;
1259 else
1260 curr = 0;
1261
1262 if (prev < 0)
1263 prev = curr;
1264
1265 if (curr != prev)
1266 break;
1267
1268 t++;
1269 if (e && *e)
1270 e++;
1271 }
1272
1273 /*
1274 * We've isolated a maximal subsequence of the line
1275 * which has the same emphasis. Form it into a word
1276 * structure.
1277 */
1278 w = mknew(word);
1279 w->next = NULL;
1280 w->alt = NULL;
1281 w->type = (prev == 0 ? word_WeakCode :
1282 prev == 1 ? word_Emph : word_Normal);
1283 w->text = mknewa(wchar_t, t-start+1);
1284 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1285 w->text[t-start] = '\0';
1286 w->breaks = FALSE;
1287
1288 if (ltail)
1289 ltail->next = w;
1290 else
1291 lhead = w;
1292 ltail = w;
1293
1294 start = t;
1295 prev = -1;
1296 }
1297
1298 ldata = mknew(line_data);
1299
1300 ldata->pdata = pdata;
1301 ldata->first = lhead;
1302 ldata->end = NULL;
1303 ldata->line_height = font_size * 4096;
1304
1305 ldata->xpos = indent;
1306
1307 if (pdata->last) {
1308 pdata->last->next = ldata;
1309 ldata->prev = pdata->last;
1310 } else {
1311 pdata->first = ldata;
1312 ldata->prev = NULL;
1313 }
1314 ldata->next = NULL;
1315 pdata->last = ldata;
1316
1317 ldata->hshortfall = 0;
1318 ldata->nspaces = 0;
1319 ldata->aux_text = NULL;
1320 ldata->aux_text_2 = NULL;
1321 ldata->aux_left_indent = 0;
1322 /* General opprobrium for breaking in a code paragraph. */
1323 ldata->penalty_before = ldata->penalty_after = 50000;
1324 }
1325 }