Implemented horizontal rules.
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - set up contents section now we know what sections begin on
16 * which pages
17 *
18 * - do PDF outline
19 *
20 * - index
21 *
22 * - header/footer? Page numbers at least would be handy. Fully
23 * configurable footer can wait, though.
24 *
25 * That should bring us to the same level of functionality that
26 * original-Halibut had, and the same in PDF plus the obvious
27 * interactive navigation features. After that, in future work:
28 *
29 * - linearised PDF, perhaps?
30 *
31 * - I'm uncertain of whether I need to include a ToUnicode CMap
32 * in each of my font definitions in PDF. Currently things (by
33 * which I mean cut and paste out of acroread) seem to be
34 * working fairly happily without it, but I don't know.
35 *
36 * - configurability
37 *
38 * - title pages
39 */
40
41 #include <assert.h>
42 #include <stdio.h>
43
44 #include "halibut.h"
45 #include "paper.h"
46
47 static font_data *make_std_font(font_list *fontlist, char const *name);
48 static void wrap_paragraph(para_data *pdata, word *words,
49 int w, int i1, int i2);
50 static page_data *page_breaks(line_data *first, line_data *last,
51 int page_height);
52 static void render_line(line_data *ldata, int left_x, int top_y,
53 xref_dest *dest, keywordlist *keywords);
54 static int paper_width_simple(para_data *pdata, word *text);
55 static void code_paragraph(para_data *pdata,
56 font_data *fn, font_data *fi, font_data *fb,
57 int font_size, int indent, word *words);
58 static void rule_paragraph(para_data *pdata, int indent, int height);
59 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
60
61 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
62 indexdata *idx) {
63 paragraph *p;
64 document *doc;
65 int indent, extra_indent, firstline_indent, aux_indent;
66 para_data *pdata;
67 line_data *ldata, *firstline, *lastline;
68 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
69 page_data *pages;
70 font_list *fontlist;
71 word *aux, *aux2;
72
73 /*
74 * FIXME: All these things ought to become configurable.
75 */
76 int paper_width = 595 * 4096;
77 int paper_height = 841 * 4096;
78 int left_margin = 72 * 4096;
79 int top_margin = 72 * 4096;
80 int right_margin = 72 * 4096;
81 int bottom_margin = 108 * 4096;
82 int indent_list_bullet = 6 * 4096;
83 int indent_list = 24 * 4096;
84 int indent_quote = 18 * 4096;
85 int base_leading = 4096;
86 int base_para_spacing = 10 * 4096;
87 int chapter_top_space = 72 * 4096;
88 int sect_num_left_space = 12 * 4096;
89 int chapter_underline_depth = 14 * 4096;
90 int chapter_underline_thickness = 3 * 4096;
91 int rule_thickness = 1 * 4096;
92
93 int base_width = paper_width - left_margin - right_margin;
94 int page_height = paper_height - top_margin - bottom_margin;
95
96 IGNORE(idx); /* FIXME */
97
98 /*
99 * First, set up some font structures.
100 */
101 fontlist = mknew(font_list);
102 fontlist->head = fontlist->tail = NULL;
103 tr = make_std_font(fontlist, "Times-Roman");
104 ti = make_std_font(fontlist, "Times-Italic");
105 hr = make_std_font(fontlist, "Helvetica-Bold");
106 hi = make_std_font(fontlist, "Helvetica-BoldOblique");
107 cr = make_std_font(fontlist, "Courier");
108 co = make_std_font(fontlist, "Courier-Oblique");
109 cb = make_std_font(fontlist, "Courier-Bold");
110
111 /*
112 * Go through and break up each paragraph into lines.
113 */
114 indent = 0;
115 firstline = lastline = NULL;
116 for (p = sourceform; p; p = p->next) {
117 p->private_data = NULL;
118
119 switch (p->type) {
120 /*
121 * These paragraph types are either invisible or don't
122 * define text in the normal sense. Either way, they
123 * don't require wrapping.
124 */
125 case para_IM:
126 case para_BR:
127 case para_Biblio:
128 case para_NotParaType:
129 case para_Config:
130 case para_VersionID:
131 case para_NoCite:
132 break;
133
134 /*
135 * These paragraph types don't require wrapping, but
136 * they do affect the line width to which we wrap the
137 * rest of the paragraphs, so we need to pay attention.
138 */
139 case para_LcontPush:
140 indent += indent_list; break;
141 case para_LcontPop:
142 indent -= indent_list; assert(indent >= 0); break;
143 case para_QuotePush:
144 indent += indent_quote; break;
145 case para_QuotePop:
146 indent -= indent_quote; assert(indent >= 0); break;
147
148 /*
149 * This paragraph type is special. Process it
150 * specially.
151 */
152 case para_Code:
153 pdata = mknew(para_data);
154 code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
155 p->private_data = pdata;
156 if (pdata->first != pdata->last) {
157 pdata->first->penalty_after += 100000;
158 pdata->last->penalty_before += 100000;
159 }
160 break;
161
162 /*
163 * This paragraph is also special.
164 */
165 case para_Rule:
166 pdata = mknew(para_data);
167 rule_paragraph(pdata, indent, rule_thickness);
168 p->private_data = pdata;
169 break;
170
171 /*
172 * All of these paragraph types require wrapping in the
173 * ordinary way. So we must supply a set of fonts, a
174 * line width and auxiliary information (e.g. bullet
175 * text) for each one.
176 */
177 case para_Chapter:
178 case para_Appendix:
179 case para_UnnumberedChapter:
180 case para_Heading:
181 case para_Subsect:
182 case para_Normal:
183 case para_BiblioCited:
184 case para_Bullet:
185 case para_NumberedList:
186 case para_DescribedThing:
187 case para_Description:
188 case para_Copyright:
189 case para_Title:
190 pdata = mknew(para_data);
191
192 /*
193 * Choose fonts for this paragraph.
194 *
195 * FIXME: All of this ought to be completely
196 * user-configurable.
197 */
198 switch (p->type) {
199 case para_Title:
200 pdata->fonts[FONT_NORMAL] = hr;
201 pdata->sizes[FONT_NORMAL] = 24;
202 pdata->fonts[FONT_EMPH] = hi;
203 pdata->sizes[FONT_EMPH] = 24;
204 pdata->fonts[FONT_CODE] = cb;
205 pdata->sizes[FONT_CODE] = 24;
206 break;
207
208 case para_Chapter:
209 case para_Appendix:
210 case para_UnnumberedChapter:
211 pdata->fonts[FONT_NORMAL] = hr;
212 pdata->sizes[FONT_NORMAL] = 20;
213 pdata->fonts[FONT_EMPH] = hi;
214 pdata->sizes[FONT_EMPH] = 20;
215 pdata->fonts[FONT_CODE] = cb;
216 pdata->sizes[FONT_CODE] = 20;
217 break;
218
219 case para_Heading:
220 case para_Subsect:
221 pdata->fonts[FONT_NORMAL] = hr;
222 pdata->fonts[FONT_EMPH] = hi;
223 pdata->fonts[FONT_CODE] = cb;
224 pdata->sizes[FONT_NORMAL] =
225 pdata->sizes[FONT_EMPH] =
226 pdata->sizes[FONT_CODE] =
227 (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
228 break;
229
230 case para_Normal:
231 case para_BiblioCited:
232 case para_Bullet:
233 case para_NumberedList:
234 case para_DescribedThing:
235 case para_Description:
236 case para_Copyright:
237 pdata->fonts[FONT_NORMAL] = tr;
238 pdata->sizes[FONT_NORMAL] = 12;
239 pdata->fonts[FONT_EMPH] = ti;
240 pdata->sizes[FONT_EMPH] = 12;
241 pdata->fonts[FONT_CODE] = cr;
242 pdata->sizes[FONT_CODE] = 12;
243 break;
244 }
245
246 /*
247 * Also select an indentation level depending on the
248 * paragraph type (list paragraphs other than
249 * para_DescribedThing need extra indent).
250 *
251 * (FIXME: Perhaps at some point we might even arrange
252 * for the user to be able to request indented first
253 * lines in paragraphs.)
254 */
255 if (p->type == para_Bullet ||
256 p->type == para_NumberedList ||
257 p->type == para_Description) {
258 extra_indent = firstline_indent = indent_list;
259 } else {
260 extra_indent = firstline_indent = 0;
261 }
262
263 /*
264 * Find the auxiliary text for this paragraph.
265 */
266 aux = aux2 = NULL;
267 aux_indent = 0;
268
269 switch (p->type) {
270 case para_Chapter:
271 case para_Appendix:
272 case para_Heading:
273 case para_Subsect:
274 /*
275 * For some heading styles (FIXME: be able to
276 * configure which), the auxiliary text contains
277 * the chapter number and is arranged to be
278 * right-aligned a few points left of the primary
279 * margin. For other styles, the auxiliary text is
280 * the full chapter _name_ and takes up space
281 * within the (wrapped) chapter title, meaning that
282 * we must move the first line indent over to make
283 * space for it.
284 */
285 if (p->type == para_Heading || p->type == para_Subsect) {
286 int len;
287
288 aux = p->kwtext2;
289 len = paper_width_simple(pdata, p->kwtext2);
290 aux_indent = -len - sect_num_left_space;
291 } else {
292 aux = p->kwtext;
293 aux2 = mknew(word);
294 aux2->next = NULL;
295 aux2->alt = NULL;
296 aux2->type = word_Normal;
297 aux2->text = ustrdup(L": ");
298 aux2->breaks = FALSE;
299 aux2->aux = 0;
300 aux_indent = 0;
301
302 firstline_indent += paper_width_simple(pdata, aux);
303 firstline_indent += paper_width_simple(pdata, aux2);
304 }
305 break;
306
307 case para_Bullet:
308 /*
309 * Auxiliary text consisting of a bullet. (FIXME:
310 * configurable bullet.)
311 */
312 aux = mknew(word);
313 aux->next = NULL;
314 aux->alt = NULL;
315 aux->type = word_Normal;
316 aux->text = ustrdup(L"\x2022");
317 aux->breaks = FALSE;
318 aux->aux = 0;
319 aux_indent = indent + indent_list_bullet;
320 break;
321
322 case para_NumberedList:
323 /*
324 * Auxiliary text consisting of the number followed
325 * by a (FIXME: configurable) full stop.
326 */
327 aux = p->kwtext;
328 aux2 = mknew(word);
329 aux2->next = NULL;
330 aux2->alt = NULL;
331 aux2->type = word_Normal;
332 aux2->text = ustrdup(L".");
333 aux2->breaks = FALSE;
334 aux2->aux = 0;
335 aux_indent = indent + indent_list_bullet;
336 break;
337
338 case para_BiblioCited:
339 /*
340 * Auxiliary text consisting of the bibliography
341 * reference text, and a trailing space.
342 */
343 aux = p->kwtext;
344 aux2 = mknew(word);
345 aux2->next = NULL;
346 aux2->alt = NULL;
347 aux2->type = word_Normal;
348 aux2->text = ustrdup(L" ");
349 aux2->breaks = FALSE;
350 aux2->aux = 0;
351 aux_indent = indent;
352 firstline_indent += paper_width_simple(pdata, aux);
353 firstline_indent += paper_width_simple(pdata, aux2);
354 break;
355 }
356
357 wrap_paragraph(pdata, p->words, base_width,
358 indent + firstline_indent,
359 indent + extra_indent);
360
361 p->private_data = pdata;
362
363 pdata->first->aux_text = aux;
364 pdata->first->aux_text_2 = aux2;
365 pdata->first->aux_left_indent = aux_indent;
366
367 /*
368 * Line breaking penalties.
369 */
370 switch (p->type) {
371 case para_Chapter:
372 case para_Appendix:
373 case para_Heading:
374 case para_Subsect:
375 case para_UnnumberedChapter:
376 /*
377 * Fixed and large penalty for breaking straight
378 * after a heading; corresponding bonus for
379 * breaking straight before.
380 */
381 pdata->first->penalty_before = -500000;
382 pdata->last->penalty_after = 500000;
383 for (ldata = pdata->first; ldata; ldata = ldata->next)
384 ldata->penalty_after = 500000;
385 break;
386
387 case para_DescribedThing:
388 /*
389 * This is treated a bit like a small heading:
390 * there's a penalty for breaking after it (i.e.
391 * between it and its description), and a bonus for
392 * breaking before it (actually _between_ list
393 * items).
394 */
395 pdata->first->penalty_before = -200000;
396 pdata->last->penalty_after = 200000;
397 break;
398
399 default:
400 /*
401 * Most paragraph types: widow/orphan control by
402 * discouraging breaking one line from the end of
403 * any paragraph.
404 */
405 if (pdata->first != pdata->last) {
406 pdata->first->penalty_after = 100000;
407 pdata->last->penalty_before = 100000;
408 }
409 break;
410 }
411
412 break;
413 }
414
415 if (p->private_data) {
416 pdata = (para_data *)p->private_data;
417
418 /*
419 * Set the line spacing for each line in this paragraph.
420 */
421 for (ldata = pdata->first; ldata; ldata = ldata->next) {
422 if (ldata == pdata->first)
423 ldata->space_before = base_para_spacing / 2;
424 else
425 ldata->space_before = base_leading / 2;
426 if (ldata == pdata->last)
427 ldata->space_after = base_para_spacing / 2;
428 else
429 ldata->space_after = base_leading / 2;
430 ldata->page_break = FALSE;
431 }
432
433 /*
434 * Some kinds of section heading do require a page
435 * break before them.
436 */
437 if (p->type == para_Title ||
438 p->type == para_Chapter ||
439 p->type == para_Appendix ||
440 p->type == para_UnnumberedChapter) {
441 pdata->first->page_break = TRUE;
442 pdata->first->space_before = chapter_top_space;
443 pdata->last->space_after +=
444 chapter_underline_depth + chapter_underline_thickness;
445 }
446
447 /*
448 * Link all line structures together into a big list.
449 */
450 if (pdata->first) {
451 if (lastline) {
452 lastline->next = pdata->first;
453 pdata->first->prev = lastline;
454 } else {
455 firstline = pdata->first;
456 pdata->first->prev = NULL;
457 }
458 lastline = pdata->last;
459 }
460 }
461 }
462
463 /*
464 * Now we have an enormous linked list of every line of text in
465 * the document. Break it up into pages.
466 */
467 pages = page_breaks(firstline, lastline, page_height);
468
469 /*
470 * Now we're ready to actually lay out the pages. We do this by
471 * looping over _paragraphs_, since we may need to track cross-
472 * references between lines and even across pages.
473 */
474 for (p = sourceform; p; p = p->next) {
475 pdata = (para_data *)p->private_data;
476
477 if (pdata) {
478 xref_dest dest;
479 dest.type = NONE;
480 for (ldata = pdata->first; ldata; ldata = ldata->next) {
481 render_line(ldata, left_margin, paper_height - top_margin,
482 &dest, keywords);
483 if (ldata == pdata->last)
484 break;
485 }
486
487 /*
488 * Some section headings (FIXME: should be configurable
489 * which) want to be underlined.
490 */
491 if (p->type == para_Chapter || p->type == para_Appendix ||
492 p->type == para_UnnumberedChapter || p->type == para_Title) {
493 add_rect_to_page(pdata->last->page,
494 left_margin,
495 (paper_height - top_margin -
496 pdata->last->ypos - chapter_underline_depth),
497 base_width,
498 chapter_underline_thickness);
499 }
500
501 /*
502 * Rule paragraphs need to contain an actual rule!
503 */
504 if (p->type == para_Rule) {
505 add_rect_to_page(pdata->first->page,
506 left_margin + pdata->first->xpos,
507 (paper_height - top_margin -
508 pdata->last->ypos -
509 pdata->last->line_height),
510 base_width - pdata->first->xpos,
511 pdata->last->line_height);
512 }
513 }
514 }
515
516 doc = mknew(document);
517 doc->fonts = fontlist;
518 doc->pages = pages;
519 doc->paper_width = paper_width;
520 doc->paper_height = paper_height;
521 return doc;
522 }
523
524 static font_encoding *new_font_encoding(font_data *font)
525 {
526 font_encoding *fe;
527 int i;
528
529 fe = mknew(font_encoding);
530 fe->next = NULL;
531
532 if (font->list->tail)
533 font->list->tail->next = fe;
534 else
535 font->list->head = fe;
536 font->list->tail = fe;
537
538 fe->font = font;
539 fe->free_pos = 0x21;
540
541 for (i = 0; i < 256; i++) {
542 fe->vector[i] = NULL;
543 fe->indices[i] = -1;
544 fe->to_unicode[i] = 0xFFFF;
545 }
546
547 return fe;
548 }
549
550 static font_data *make_std_font(font_list *fontlist, char const *name)
551 {
552 const int *widths;
553 int nglyphs;
554 font_data *f;
555 font_encoding *fe;
556 int i;
557
558 widths = ps_std_font_widths(name);
559 if (!widths)
560 return NULL;
561
562 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
563
564 f = mknew(font_data);
565
566 f->list = fontlist;
567 f->name = name;
568 f->nglyphs = nglyphs;
569 f->glyphs = ps_std_glyphs;
570 f->widths = widths;
571 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
572
573 /*
574 * Our first subfont will contain all of US-ASCII. This isn't
575 * really necessary - we could just create custom subfonts
576 * precisely as the whim of render_string dictated - but
577 * instinct suggests that it might be nice to have the text in
578 * the output files look _marginally_ recognisable.
579 */
580 fe = new_font_encoding(f);
581 fe->free_pos = 0xA1; /* only the top half is free */
582 f->latest_subfont = fe;
583
584 for (i = 0; i < (int)lenof(f->bmp); i++)
585 f->bmp[i] = 0xFFFF;
586
587 for (i = 0; i < nglyphs; i++) {
588 wchar_t ucs;
589 ucs = ps_glyph_to_unicode(f->glyphs[i]);
590 assert(ucs != 0xFFFF);
591 f->bmp[ucs] = i;
592 if (ucs >= 0x20 && ucs <= 0x7E) {
593 fe->vector[ucs] = f->glyphs[i];
594 fe->indices[ucs] = i;
595 fe->to_unicode[ucs] = ucs;
596 f->subfont_map[i].subfont = fe;
597 f->subfont_map[i].position = ucs;
598 } else {
599 /*
600 * This character is not yet assigned to a subfont.
601 */
602 f->subfont_map[i].subfont = NULL;
603 f->subfont_map[i].position = 0;
604 }
605 }
606
607 return f;
608 }
609
610 static int string_width(font_data *font, wchar_t const *string, int *errs)
611 {
612 int width = 0;
613
614 if (errs)
615 *errs = 0;
616
617 for (; *string; string++) {
618 int index;
619
620 index = font->bmp[(unsigned short)*string];
621 if (index == 0xFFFF) {
622 if (errs)
623 *errs = 1;
624 } else {
625 width += font->widths[index];
626 }
627 }
628
629 return width;
630 }
631
632 static int paper_width_internal(void *vctx, word *word, int *nspaces);
633
634 struct paper_width_ctx {
635 int minspacewidth;
636 para_data *pdata;
637 };
638
639 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
640 int w = 0;
641 while (text && text != end) {
642 w += paper_width_internal(vctx, text, nspaces);
643 text = text->next;
644 }
645 return w;
646 }
647
648 static int paper_width_internal(void *vctx, word *word, int *nspaces)
649 {
650 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
651 int style, type, findex, width, errs;
652 wchar_t *str;
653
654 switch (word->type) {
655 case word_HyperLink:
656 case word_HyperEnd:
657 case word_UpperXref:
658 case word_LowerXref:
659 case word_XrefEnd:
660 case word_IndexRef:
661 return 0;
662 }
663
664 style = towordstyle(word->type);
665 type = removeattr(word->type);
666
667 findex = (style == word_Normal ? FONT_NORMAL :
668 style == word_Emph ? FONT_EMPH :
669 FONT_CODE);
670
671 if (type == word_Normal) {
672 str = word->text;
673 } else if (type == word_WhiteSpace) {
674 if (findex != FONT_CODE) {
675 if (nspaces)
676 (*nspaces)++;
677 return ctx->minspacewidth;
678 } else
679 str = L" ";
680 } else /* if (type == word_Quote) */ {
681 if (word->aux == quote_Open)
682 str = L"\x2018"; /* FIXME: configurability! */
683 else
684 str = L"\x2019"; /* FIXME: configurability! */
685 }
686
687 width = string_width(ctx->pdata->fonts[findex], str, &errs);
688
689 if (errs && word->alt)
690 return paper_width_list(vctx, word->alt, NULL, nspaces);
691 else
692 return ctx->pdata->sizes[findex] * width;
693 }
694
695 static int paper_width(void *vctx, word *word)
696 {
697 return paper_width_internal(vctx, word, NULL);
698 }
699
700 static int paper_width_simple(para_data *pdata, word *text)
701 {
702 struct paper_width_ctx ctx;
703
704 ctx.pdata = pdata;
705 ctx.minspacewidth =
706 (pdata->sizes[FONT_NORMAL] *
707 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
708
709 return paper_width_list(&ctx, text, NULL, NULL);
710 }
711
712 static void wrap_paragraph(para_data *pdata, word *words,
713 int w, int i1, int i2)
714 {
715 wrappedline *wrapping, *p;
716 int spacewidth;
717 struct paper_width_ctx ctx;
718 int line_height;
719
720 /*
721 * We're going to need to store the line height in every line
722 * structure we generate.
723 */
724 {
725 int i;
726 line_height = 0;
727 for (i = 0; i < NFONTS; i++)
728 if (line_height < pdata->sizes[i])
729 line_height = pdata->sizes[i];
730 line_height *= 4096;
731 }
732
733 spacewidth = (pdata->sizes[FONT_NORMAL] *
734 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
735 if (spacewidth == 0) {
736 /*
737 * A font without a space?! Disturbing. I hope this never
738 * comes up, but I'll make a random guess anyway and set my
739 * space width to half the point size.
740 */
741 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
742 }
743
744 /*
745 * I'm going to set the _minimum_ space width to 3/5 of the
746 * standard one, and use the standard one as the optimum.
747 */
748 ctx.minspacewidth = spacewidth * 3 / 5;
749 ctx.pdata = pdata;
750
751 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
752
753 /*
754 * Having done the wrapping, we now concoct a set of line_data
755 * structures.
756 */
757 pdata->first = pdata->last = NULL;
758
759 for (p = wrapping; p; p = p->next) {
760 line_data *ldata;
761 word *wd;
762 int len, wid, spaces;
763
764 ldata = mknew(line_data);
765
766 ldata->pdata = pdata;
767 ldata->first = p->begin;
768 ldata->end = p->end;
769 ldata->line_height = line_height;
770
771 ldata->xpos = (p == wrapping ? i1 : i2);
772
773 if (pdata->last) {
774 pdata->last->next = ldata;
775 ldata->prev = pdata->last;
776 } else {
777 pdata->first = ldata;
778 ldata->prev = NULL;
779 }
780 ldata->next = NULL;
781 pdata->last = ldata;
782
783 spaces = 0;
784 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
785 wid = (p == wrapping ? w - i1 : w - i2);
786 wd = ldata->first;
787
788 ldata->hshortfall = wid - len;
789 ldata->nspaces = spaces;
790 /*
791 * This tells us how much the space width needs to
792 * change from _min_spacewidth. But we want to store
793 * its difference from the _natural_ space width, to
794 * make the text rendering easier.
795 */
796 ldata->hshortfall += ctx.minspacewidth * spaces;
797 ldata->hshortfall -= spacewidth * spaces;
798 /*
799 * Special case: on the last line of a paragraph, we
800 * never stretch spaces.
801 */
802 if (ldata->hshortfall > 0 && !p->next)
803 ldata->hshortfall = 0;
804
805 ldata->aux_text = NULL;
806 ldata->aux_text_2 = NULL;
807 ldata->aux_left_indent = 0;
808 ldata->penalty_before = ldata->penalty_after = 0;
809 }
810
811 }
812
813 static page_data *page_breaks(line_data *first, line_data *last,
814 int page_height)
815 {
816 line_data *l, *m;
817 page_data *ph, *pt;
818
819 /*
820 * Page breaking is done by a close analogue of the optimal
821 * paragraph wrapping algorithm used by wrap_para(). We work
822 * backwards from the end of the document line by line; for
823 * each line, we contemplate every possible number of lines we
824 * could put on a page starting with that line, determine a
825 * cost function for each one, add it to the pre-computed cost
826 * function for optimally page-breaking everything after that
827 * page, and pick the best option.
828 *
829 * Since my line_data structures are only used for this
830 * purpose, I might as well just store the algorithm data
831 * directly in them.
832 */
833
834 for (l = last; l; l = l->prev) {
835 int minheight, text = 0, space = 0;
836 int cost;
837
838 l->bestcost = -1;
839 for (m = l; m; m = m->next) {
840 if (m != l && m->page_break)
841 break; /* we've gone as far as we can */
842
843 if (m != l)
844 space += m->prev->space_after;
845 if (m != l || m->page_break)
846 space += m->space_before;
847 text += m->line_height;
848 minheight = text + space;
849
850 if (m != l && minheight > page_height)
851 break;
852
853 /*
854 * Compute the cost of this arrangement, as the square
855 * of the amount of wasted space on the page.
856 * Exception: if this is the last page before a
857 * mandatory break or the document end, we don't
858 * penalise a large blank area.
859 */
860 if (m->next && !m->next->page_break)
861 {
862 int x = page_height - minheight;
863 int xf;
864
865 xf = x & 0xFF;
866 x >>= 8;
867
868 cost = x*x;
869 cost += (x * xf) >> 8;
870 } else
871 cost = 0;
872
873 if (m->next && !m->next->page_break) {
874 cost += m->penalty_after;
875 cost += m->next->penalty_before;
876 }
877
878 if (m->next && !m->next->page_break)
879 cost += m->next->bestcost;
880 if (l->bestcost == -1 || l->bestcost > cost) {
881 /*
882 * This is the best option yet for this starting
883 * point.
884 */
885 l->bestcost = cost;
886 if (m->next && !m->next->page_break)
887 l->vshortfall = page_height - minheight;
888 else
889 l->vshortfall = 0;
890 l->text = text;
891 l->space = space;
892 l->page_last = m;
893 }
894 }
895 }
896
897 /*
898 * Now go through the line list forwards and assemble the
899 * actual pages.
900 */
901 ph = pt = NULL;
902
903 l = first;
904 while (l) {
905 page_data *page;
906 int text, space;
907
908 page = mknew(page_data);
909 page->next = NULL;
910 page->prev = pt;
911 if (pt)
912 pt->next = page;
913 else
914 ph = page;
915 pt = page;
916
917 page->first_line = l;
918 page->last_line = l->page_last;
919
920 page->first_text = page->last_text = NULL;
921 page->first_xref = page->last_xref = NULL;
922 page->first_rect = page->last_rect = NULL;
923
924 /*
925 * Now assign a y-coordinate to each line on the page.
926 */
927 text = space = 0;
928 for (l = page->first_line; l; l = l->next) {
929 if (l != page->first_line)
930 space += l->prev->space_after;
931 if (l != page->first_line || l->page_break)
932 space += l->space_before;
933 text += l->line_height;
934
935 l->page = page;
936 l->ypos = text + space +
937 space * (float)page->first_line->vshortfall /
938 page->first_line->space;
939
940 if (l == page->last_line)
941 break;
942 }
943
944 l = page->last_line->next;
945 }
946
947 return ph;
948 }
949
950 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
951 {
952 rect *r = mknew(rect);
953
954 r->next = NULL;
955 if (page->last_rect)
956 page->last_rect->next = r;
957 else
958 page->first_rect = r;
959 page->last_rect = r;
960
961 r->x = x;
962 r->y = y;
963 r->w = w;
964 r->h = h;
965 }
966
967 static void add_string_to_page(page_data *page, int x, int y,
968 font_encoding *fe, int size, char *text)
969 {
970 text_fragment *frag;
971
972 frag = mknew(text_fragment);
973 frag->next = NULL;
974
975 if (page->last_text)
976 page->last_text->next = frag;
977 else
978 page->first_text = frag;
979 page->last_text = frag;
980
981 frag->x = x;
982 frag->y = y;
983 frag->fe = fe;
984 frag->fontsize = size;
985 frag->text = dupstr(text);
986 }
987
988 /*
989 * Returns the updated x coordinate.
990 */
991 static int render_string(page_data *page, font_data *font, int fontsize,
992 int x, int y, wchar_t *str)
993 {
994 char *text;
995 int textpos, textwid, glyph;
996 font_encoding *subfont = NULL, *sf;
997
998 text = mknewa(char, 1 + ustrlen(str));
999 textpos = textwid = 0;
1000
1001 while (*str) {
1002 glyph = font->bmp[*str];
1003
1004 if (glyph == 0xFFFF)
1005 continue; /* nothing more we can do here */
1006
1007 /*
1008 * Find which subfont this character is going in.
1009 */
1010 sf = font->subfont_map[glyph].subfont;
1011
1012 if (!sf) {
1013 int c;
1014
1015 /*
1016 * This character is not yet in a subfont. Assign one.
1017 */
1018 if (font->latest_subfont->free_pos >= 0x100)
1019 font->latest_subfont = new_font_encoding(font);
1020
1021 c = font->latest_subfont->free_pos++;
1022 if (font->latest_subfont->free_pos == 0x7F)
1023 font->latest_subfont->free_pos = 0xA1;
1024
1025 font->subfont_map[glyph].subfont = font->latest_subfont;
1026 font->subfont_map[glyph].position = c;
1027 font->latest_subfont->vector[c] = font->glyphs[glyph];
1028 font->latest_subfont->indices[c] = glyph;
1029 font->latest_subfont->to_unicode[c] = *str;
1030
1031 sf = font->latest_subfont;
1032 }
1033
1034 if (!subfont || sf != subfont) {
1035 if (subfont) {
1036 text[textpos] = '\0';
1037 add_string_to_page(page, x, y, subfont, fontsize, text);
1038 x += textwid;
1039 } else {
1040 assert(textpos == 0);
1041 }
1042 textpos = 0;
1043 subfont = sf;
1044 }
1045
1046 text[textpos++] = font->subfont_map[glyph].position;
1047 textwid += font->widths[glyph] * fontsize;
1048
1049 str++;
1050 }
1051
1052 if (textpos > 0) {
1053 text[textpos] = '\0';
1054 add_string_to_page(page, x, y, subfont, fontsize, text);
1055 x += textwid;
1056 }
1057
1058 return x;
1059 }
1060
1061 /*
1062 * Returns the updated x coordinate.
1063 */
1064 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
1065 int x, int y, word *text, word *text_end, xref **xr,
1066 int shortfall, int nspaces, int *nspace,
1067 keywordlist *keywords)
1068 {
1069 while (text && text != text_end) {
1070 int style, type, findex, errs;
1071 wchar_t *str;
1072 xref_dest dest;
1073
1074 switch (text->type) {
1075 /*
1076 * Start a cross-reference.
1077 */
1078 case word_HyperLink:
1079 case word_UpperXref:
1080 case word_LowerXref:
1081
1082 if (text->type == word_HyperLink) {
1083 dest.type = URL;
1084 dest.url = utoa_dup(text->text);
1085 dest.page = NULL;
1086 } else {
1087 keyword *kwl = kw_lookup(keywords, text->text);
1088 para_data *pdata;
1089
1090 if (kwl) {
1091 assert(kwl->para->private_data);
1092 pdata = (para_data *) kwl->para->private_data;
1093 dest.type = PAGE;
1094 dest.page = pdata->first->page;
1095 dest.url = NULL;
1096 } else {
1097 /*
1098 * Shouldn't happen, but *shrug*
1099 */
1100 dest.type = NONE;
1101 dest.page = NULL;
1102 dest.url = NULL;
1103 }
1104 }
1105 if (dest.type != NONE) {
1106 *xr = mknew(xref);
1107 (*xr)->dest = dest; /* structure copy */
1108 if (page->last_xref)
1109 page->last_xref->next = *xr;
1110 else
1111 page->first_xref = *xr;
1112 page->last_xref = *xr;
1113 (*xr)->next = NULL;
1114
1115 /*
1116 * FIXME: Ideally we should have, and use, some
1117 * vertical font metric information here so that
1118 * our cross-ref rectangle can take account of
1119 * descenders and the font's cap height. This will
1120 * do for the moment, but it isn't ideal.
1121 */
1122 (*xr)->lx = (*xr)->rx = x;
1123 (*xr)->by = y;
1124 (*xr)->ty = y + ldata->line_height;
1125 }
1126 goto nextword;
1127
1128 /*
1129 * Finish extending a cross-reference box.
1130 */
1131 case word_HyperEnd:
1132 case word_XrefEnd:
1133 *xr = NULL;
1134 goto nextword;
1135
1136 case word_IndexRef:
1137 goto nextword;
1138 /*
1139 * FIXME: we should do something with this.
1140 */
1141 }
1142
1143 style = towordstyle(text->type);
1144 type = removeattr(text->type);
1145
1146 findex = (style == word_Normal ? FONT_NORMAL :
1147 style == word_Emph ? FONT_EMPH :
1148 FONT_CODE);
1149
1150 if (type == word_Normal) {
1151 str = text->text;
1152 } else if (type == word_WhiteSpace) {
1153 x += pdata->sizes[findex] *
1154 string_width(pdata->fonts[findex], L" ", NULL);
1155 if (nspaces && findex != FONT_CODE) {
1156 x += (*nspace+1) * shortfall / nspaces;
1157 x -= *nspace * shortfall / nspaces;
1158 (*nspace)++;
1159 }
1160 goto nextword;
1161 } else /* if (type == word_Quote) */ {
1162 if (text->aux == quote_Open)
1163 str = L"\x2018"; /* FIXME: configurability! */
1164 else
1165 str = L"\x2019"; /* FIXME: configurability! */
1166 }
1167
1168 (void) string_width(pdata->fonts[findex], str, &errs);
1169
1170 if (errs && text->alt)
1171 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1172 xr, shortfall, nspaces, nspace, keywords);
1173 else
1174 x = render_string(page, pdata->fonts[findex],
1175 pdata->sizes[findex], x, y, str);
1176
1177 if (*xr)
1178 (*xr)->rx = x;
1179
1180 nextword:
1181 text = text->next;
1182 }
1183
1184 return x;
1185 }
1186
1187 static void render_line(line_data *ldata, int left_x, int top_y,
1188 xref_dest *dest, keywordlist *keywords)
1189 {
1190 int nspace;
1191 xref *xr;
1192
1193 if (ldata->aux_text) {
1194 int x;
1195 xr = NULL;
1196 nspace = 0;
1197 x = render_text(ldata->page, ldata->pdata, ldata,
1198 left_x + ldata->aux_left_indent,
1199 top_y - ldata->ypos,
1200 ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1201 if (ldata->aux_text_2)
1202 render_text(ldata->page, ldata->pdata, ldata,
1203 x, top_y - ldata->ypos,
1204 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1205 }
1206 nspace = 0;
1207
1208 if (ldata->first) {
1209 /*
1210 * There might be a cross-reference carried over from a
1211 * previous line.
1212 */
1213 if (dest->type != NONE) {
1214 xr = mknew(xref);
1215 xr->next = NULL;
1216 xr->dest = *dest; /* structure copy */
1217 if (ldata->page->last_xref)
1218 ldata->page->last_xref->next = xr;
1219 else
1220 ldata->page->first_xref = xr;
1221 ldata->page->last_xref = xr;
1222 xr->lx = xr->rx = left_x + ldata->xpos;
1223 xr->by = top_y - ldata->ypos;
1224 xr->ty = top_y - ldata->ypos + ldata->line_height;
1225 } else
1226 xr = NULL;
1227
1228 render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1229 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1230 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1231
1232 if (xr) {
1233 /*
1234 * There's a cross-reference continued on to the next line.
1235 */
1236 *dest = xr->dest;
1237 } else
1238 dest->type = NONE;
1239 }
1240 }
1241
1242 static void code_paragraph(para_data *pdata,
1243 font_data *fn, font_data *fi, font_data *fb,
1244 int font_size, int indent, word *words)
1245 {
1246 /*
1247 * For code paragraphs, I'm going to hack grievously and
1248 * pretend the three normal fonts are the three code paragraph
1249 * fonts.
1250 */
1251 pdata->fonts[FONT_NORMAL] = fb;
1252 pdata->fonts[FONT_EMPH] = fi;
1253 pdata->fonts[FONT_CODE] = fn;
1254 pdata->sizes[FONT_NORMAL] =
1255 pdata->sizes[FONT_EMPH] =
1256 pdata->sizes[FONT_CODE] = font_size;
1257
1258 pdata->first = pdata->last = NULL;
1259
1260 for (; words; words = words->next) {
1261 wchar_t *t, *e, *start;
1262 word *lhead = NULL, *ltail = NULL, *w;
1263 line_data *ldata;
1264 int prev = -1, curr;
1265
1266 t = words->text;
1267 if (words->next && words->next->type == word_Emph) {
1268 e = words->next->text;
1269 words = words->next;
1270 } else
1271 e = NULL;
1272
1273 start = t;
1274
1275 while (*start) {
1276 while (*t) {
1277 if (!e || !*e)
1278 curr = 0;
1279 else if (*e == L'i')
1280 curr = 1;
1281 else if (*e == L'b')
1282 curr = 2;
1283 else
1284 curr = 0;
1285
1286 if (prev < 0)
1287 prev = curr;
1288
1289 if (curr != prev)
1290 break;
1291
1292 t++;
1293 if (e && *e)
1294 e++;
1295 }
1296
1297 /*
1298 * We've isolated a maximal subsequence of the line
1299 * which has the same emphasis. Form it into a word
1300 * structure.
1301 */
1302 w = mknew(word);
1303 w->next = NULL;
1304 w->alt = NULL;
1305 w->type = (prev == 0 ? word_WeakCode :
1306 prev == 1 ? word_Emph : word_Normal);
1307 w->text = mknewa(wchar_t, t-start+1);
1308 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1309 w->text[t-start] = '\0';
1310 w->breaks = FALSE;
1311
1312 if (ltail)
1313 ltail->next = w;
1314 else
1315 lhead = w;
1316 ltail = w;
1317
1318 start = t;
1319 prev = -1;
1320 }
1321
1322 ldata = mknew(line_data);
1323
1324 ldata->pdata = pdata;
1325 ldata->first = lhead;
1326 ldata->end = NULL;
1327 ldata->line_height = font_size * 4096;
1328
1329 ldata->xpos = indent;
1330
1331 if (pdata->last) {
1332 pdata->last->next = ldata;
1333 ldata->prev = pdata->last;
1334 } else {
1335 pdata->first = ldata;
1336 ldata->prev = NULL;
1337 }
1338 ldata->next = NULL;
1339 pdata->last = ldata;
1340
1341 ldata->hshortfall = 0;
1342 ldata->nspaces = 0;
1343 ldata->aux_text = NULL;
1344 ldata->aux_text_2 = NULL;
1345 ldata->aux_left_indent = 0;
1346 /* General opprobrium for breaking in a code paragraph. */
1347 ldata->penalty_before = ldata->penalty_after = 50000;
1348 }
1349 }
1350
1351 static void rule_paragraph(para_data *pdata, int indent, int height)
1352 {
1353 line_data *ldata;
1354
1355 ldata = mknew(line_data);
1356
1357 ldata->pdata = pdata;
1358 ldata->first = NULL;
1359 ldata->end = NULL;
1360 ldata->line_height = height;
1361
1362 ldata->xpos = indent;
1363
1364 ldata->prev = NULL;
1365 ldata->next = NULL;
1366
1367 ldata->hshortfall = 0;
1368 ldata->nspaces = 0;
1369 ldata->aux_text = NULL;
1370 ldata->aux_text_2 = NULL;
1371 ldata->aux_left_indent = 0;
1372
1373 /*
1374 * Better to break after a rule than before it
1375 */
1376 ldata->penalty_after += 100000;
1377 ldata->penalty_before += -100000;
1378
1379 pdata->first = pdata->last = ldata;
1380 }