Fix the TODO comments up a bit.
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - tune the page breaking algorithm to impose penalties on
16 * various things
17 * * breaking in the middle of a code paragraph
18 * * breaking one line from the start or end of a paragraph
19 * * breaking immediately after a heading of any kind (or
20 * indeed within one)
21 * * we may also need to impose a limit on the amount by which
22 * we can _stretch_ a page; after a certain point we may
23 * prefer just to unapologetically leave space at the bottom.
24 *
25 * - implement some simple graphics
26 * * I had an underline below chapter headings in the original
27 * Perl version, and I thought it looked rather nice
28 * * also we need para_Rule.
29 *
30 * - set up contents section now we know what sections begin on
31 * which pages
32 *
33 * - do PDF outline
34 *
35 * - index
36 *
37 * - header/footer? Page numbers at least would be handy. Fully
38 * configurable footer can wait, though.
39 *
40 * That should bring us to the same level of functionality that
41 * original-Halibut had, and the same in PDF plus the obvious
42 * interactive navigation features. After that, in future work:
43 *
44 * - linearised PDF, perhaps?
45 *
46 * - I'm uncertain of whether I need to include a ToUnicode CMap
47 * in each of my font definitions in PDF. Currently things (by
48 * which I mean cut and paste out of acroread) seem to be
49 * working fairly happily without it, but I don't know.
50 *
51 * - configurability
52 *
53 * - title pages
54 */
55
56 #include <assert.h>
57 #include <stdio.h>
58
59 #include "halibut.h"
60 #include "paper.h"
61
62 static font_data *make_std_font(font_list *fontlist, char const *name);
63 static void wrap_paragraph(para_data *pdata, word *words,
64 int w, int i1, int i2);
65 static page_data *page_breaks(line_data *first, line_data *last,
66 int page_height);
67 static void render_line(line_data *ldata, int left_x, int top_y,
68 xref_dest *dest, keywordlist *keywords);
69 static int paper_width_simple(para_data *pdata, word *text);
70 static void code_paragraph(para_data *pdata,
71 font_data *fn, font_data *fi, font_data *fb,
72 int font_size, int indent, word *words);
73
74 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
75 indexdata *idx) {
76 paragraph *p;
77 document *doc;
78 int indent, extra_indent, firstline_indent, aux_indent;
79 para_data *pdata;
80 line_data *ldata, *firstline, *lastline;
81 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
82 page_data *pages;
83 font_list *fontlist;
84 word *aux, *aux2;
85
86 /*
87 * FIXME: All these things ought to become configurable.
88 */
89 int paper_width = 595 * 4096;
90 int paper_height = 841 * 4096;
91 int left_margin = 72 * 4096;
92 int top_margin = 72 * 4096;
93 int right_margin = 72 * 4096;
94 int bottom_margin = 108 * 4096;
95 int indent_list_bullet = 6 * 4096;
96 int indent_list = 24 * 4096;
97 int indent_quote = 18 * 4096;
98 int base_leading = 4096;
99 int base_para_spacing = 10 * 4096;
100 int chapter_top_space = 72 * 4096;
101 int sect_num_left_space = 12 * 4096;
102
103 int base_width = paper_width - left_margin - right_margin;
104 int page_height = paper_height - top_margin - bottom_margin;
105
106 IGNORE(idx); /* FIXME */
107
108 /*
109 * First, set up some font structures.
110 */
111 fontlist = mknew(font_list);
112 fontlist->head = fontlist->tail = NULL;
113 tr = make_std_font(fontlist, "Times-Roman");
114 ti = make_std_font(fontlist, "Times-Italic");
115 hr = make_std_font(fontlist, "Helvetica-Bold");
116 hi = make_std_font(fontlist, "Helvetica-BoldOblique");
117 cr = make_std_font(fontlist, "Courier");
118 co = make_std_font(fontlist, "Courier-Oblique");
119 cb = make_std_font(fontlist, "Courier-Bold");
120
121 /*
122 * Go through and break up each paragraph into lines.
123 */
124 indent = 0;
125 firstline = lastline = NULL;
126 for (p = sourceform; p; p = p->next) {
127 p->private_data = NULL;
128
129 switch (p->type) {
130 /*
131 * These paragraph types are either invisible or don't
132 * define text in the normal sense. Either way, they
133 * don't require wrapping.
134 */
135 case para_IM:
136 case para_BR:
137 case para_Rule:
138 case para_Biblio:
139 case para_NotParaType:
140 case para_Config:
141 case para_VersionID:
142 case para_NoCite:
143 break;
144
145 /*
146 * These paragraph types don't require wrapping, but
147 * they do affect the line width to which we wrap the
148 * rest of the paragraphs, so we need to pay attention.
149 */
150 case para_LcontPush:
151 indent += indent_list; break;
152 case para_LcontPop:
153 indent -= indent_list; assert(indent >= 0); break;
154 case para_QuotePush:
155 indent += indent_quote; break;
156 case para_QuotePop:
157 indent -= indent_quote; assert(indent >= 0); break;
158
159 /*
160 * This paragraph type is special. Process it
161 * specially.
162 */
163 case para_Code:
164 pdata = mknew(para_data);
165 code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
166 p->private_data = pdata;
167 break;
168
169 /*
170 * All of these paragraph types require wrapping in the
171 * ordinary way. So we must supply a set of fonts, a
172 * line width and auxiliary information (e.g. bullet
173 * text) for each one.
174 */
175 case para_Chapter:
176 case para_Appendix:
177 case para_UnnumberedChapter:
178 case para_Heading:
179 case para_Subsect:
180 case para_Normal:
181 case para_BiblioCited:
182 case para_Bullet:
183 case para_NumberedList:
184 case para_DescribedThing:
185 case para_Description:
186 case para_Copyright:
187 case para_Title:
188 pdata = mknew(para_data);
189
190 /*
191 * Choose fonts for this paragraph.
192 *
193 * FIXME: All of this ought to be completely
194 * user-configurable.
195 */
196 switch (p->type) {
197 case para_Title:
198 pdata->fonts[FONT_NORMAL] = hr;
199 pdata->sizes[FONT_NORMAL] = 24;
200 pdata->fonts[FONT_EMPH] = hi;
201 pdata->sizes[FONT_EMPH] = 24;
202 pdata->fonts[FONT_CODE] = cb;
203 pdata->sizes[FONT_CODE] = 24;
204 break;
205
206 case para_Chapter:
207 case para_Appendix:
208 case para_UnnumberedChapter:
209 pdata->fonts[FONT_NORMAL] = hr;
210 pdata->sizes[FONT_NORMAL] = 20;
211 pdata->fonts[FONT_EMPH] = hi;
212 pdata->sizes[FONT_EMPH] = 20;
213 pdata->fonts[FONT_CODE] = cb;
214 pdata->sizes[FONT_CODE] = 20;
215 break;
216
217 case para_Heading:
218 case para_Subsect:
219 pdata->fonts[FONT_NORMAL] = hr;
220 pdata->fonts[FONT_EMPH] = hi;
221 pdata->fonts[FONT_CODE] = cb;
222 pdata->sizes[FONT_NORMAL] =
223 pdata->sizes[FONT_EMPH] =
224 pdata->sizes[FONT_CODE] =
225 (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
226 break;
227
228 case para_Normal:
229 case para_BiblioCited:
230 case para_Bullet:
231 case para_NumberedList:
232 case para_DescribedThing:
233 case para_Description:
234 case para_Copyright:
235 pdata->fonts[FONT_NORMAL] = tr;
236 pdata->sizes[FONT_NORMAL] = 12;
237 pdata->fonts[FONT_EMPH] = ti;
238 pdata->sizes[FONT_EMPH] = 12;
239 pdata->fonts[FONT_CODE] = cr;
240 pdata->sizes[FONT_CODE] = 12;
241 break;
242 }
243
244 /*
245 * Also select an indentation level depending on the
246 * paragraph type (list paragraphs other than
247 * para_DescribedThing need extra indent).
248 *
249 * (FIXME: Perhaps at some point we might even arrange
250 * for the user to be able to request indented first
251 * lines in paragraphs.)
252 */
253 if (p->type == para_Bullet ||
254 p->type == para_NumberedList ||
255 p->type == para_Description) {
256 extra_indent = firstline_indent = indent_list;
257 } else {
258 extra_indent = firstline_indent = 0;
259 }
260
261 /*
262 * Find the auxiliary text for this paragraph.
263 */
264 aux = aux2 = NULL;
265 aux_indent = 0;
266
267 switch (p->type) {
268 case para_Chapter:
269 case para_Appendix:
270 case para_Heading:
271 case para_Subsect:
272 /*
273 * For some heading styles (FIXME: be able to
274 * configure which), the auxiliary text contains
275 * the chapter number and is arranged to be
276 * right-aligned a few points left of the primary
277 * margin. For other styles, the auxiliary text is
278 * the full chapter _name_ and takes up space
279 * within the (wrapped) chapter title, meaning that
280 * we must move the first line indent over to make
281 * space for it.
282 */
283 if (p->type == para_Heading || p->type == para_Subsect) {
284 int len;
285
286 aux = p->kwtext2;
287 len = paper_width_simple(pdata, p->kwtext2);
288 aux_indent = -len - sect_num_left_space;
289 } else {
290 aux = p->kwtext;
291 aux2 = mknew(word);
292 aux2->next = NULL;
293 aux2->alt = NULL;
294 aux2->type = word_Normal;
295 aux2->text = ustrdup(L": ");
296 aux2->breaks = FALSE;
297 aux2->aux = 0;
298 aux_indent = 0;
299
300 firstline_indent += paper_width_simple(pdata, aux);
301 firstline_indent += paper_width_simple(pdata, aux2);
302 }
303 break;
304
305 case para_Bullet:
306 /*
307 * Auxiliary text consisting of a bullet. (FIXME:
308 * configurable bullet.)
309 */
310 aux = mknew(word);
311 aux->next = NULL;
312 aux->alt = NULL;
313 aux->type = word_Normal;
314 aux->text = ustrdup(L"\x2022");
315 aux->breaks = FALSE;
316 aux->aux = 0;
317 aux_indent = indent + indent_list_bullet;
318 break;
319
320 case para_NumberedList:
321 /*
322 * Auxiliary text consisting of the number followed
323 * by a (FIXME: configurable) full stop.
324 */
325 aux = p->kwtext;
326 aux2 = mknew(word);
327 aux2->next = NULL;
328 aux2->alt = NULL;
329 aux2->type = word_Normal;
330 aux2->text = ustrdup(L".");
331 aux2->breaks = FALSE;
332 aux2->aux = 0;
333 aux_indent = indent + indent_list_bullet;
334 break;
335
336 case para_BiblioCited:
337 /*
338 * Auxiliary text consisting of the bibliography
339 * reference text, and a trailing space.
340 */
341 aux = p->kwtext;
342 aux2 = mknew(word);
343 aux2->next = NULL;
344 aux2->alt = NULL;
345 aux2->type = word_Normal;
346 aux2->text = ustrdup(L" ");
347 aux2->breaks = FALSE;
348 aux2->aux = 0;
349 aux_indent = indent;
350 firstline_indent += paper_width_simple(pdata, aux);
351 firstline_indent += paper_width_simple(pdata, aux2);
352 break;
353 }
354
355 wrap_paragraph(pdata, p->words, base_width,
356 indent + firstline_indent,
357 indent + extra_indent);
358
359 p->private_data = pdata;
360
361 pdata->first->aux_text = aux;
362 pdata->first->aux_text_2 = aux2;
363 pdata->first->aux_left_indent = aux_indent;
364
365 break;
366 }
367
368 if (p->private_data) {
369 pdata = (para_data *)p->private_data;
370
371 /*
372 * Set the line spacing for each line in this paragraph.
373 */
374 for (ldata = pdata->first; ldata; ldata = ldata->next) {
375 if (ldata == pdata->first)
376 ldata->space_before = base_para_spacing / 2;
377 else
378 ldata->space_before = base_leading / 2;
379 if (ldata == pdata->last)
380 ldata->space_after = base_para_spacing / 2;
381 else
382 ldata->space_after = base_leading / 2;
383 ldata->page_break = FALSE;
384 }
385
386 /*
387 * Some kinds of section heading do require a page
388 * break before them.
389 */
390 if (p->type == para_Title ||
391 p->type == para_Chapter ||
392 p->type == para_Appendix ||
393 p->type == para_UnnumberedChapter) {
394 pdata->first->page_break = TRUE;
395 pdata->first->space_before = chapter_top_space;
396 }
397
398 /*
399 * Link all line structures together into a big list.
400 */
401 if (pdata->first) {
402 if (lastline) {
403 lastline->next = pdata->first;
404 pdata->first->prev = lastline;
405 } else {
406 firstline = pdata->first;
407 pdata->first->prev = NULL;
408 }
409 lastline = pdata->last;
410 }
411 }
412 }
413
414 /*
415 * Now we have an enormous linked list of every line of text in
416 * the document. Break it up into pages.
417 */
418 pages = page_breaks(firstline, lastline, page_height);
419
420 /*
421 * Now we're ready to actually lay out the pages. We do this by
422 * looping over _paragraphs_, since we may need to track cross-
423 * references between lines and even across pages.
424 */
425 for (p = sourceform; p; p = p->next) {
426 pdata = (para_data *)p->private_data;
427
428 if (pdata) {
429 xref_dest dest;
430 dest.type = NONE;
431 for (ldata = pdata->first; ldata; ldata = ldata->next) {
432 render_line(ldata, left_margin, paper_height - top_margin,
433 &dest, keywords);
434 if (ldata == pdata->last)
435 break;
436 }
437 }
438 }
439
440 doc = mknew(document);
441 doc->fonts = fontlist;
442 doc->pages = pages;
443 doc->paper_width = paper_width;
444 doc->paper_height = paper_height;
445 return doc;
446 }
447
448 static font_encoding *new_font_encoding(font_data *font)
449 {
450 font_encoding *fe;
451 int i;
452
453 fe = mknew(font_encoding);
454 fe->next = NULL;
455
456 if (font->list->tail)
457 font->list->tail->next = fe;
458 else
459 font->list->head = fe;
460 font->list->tail = fe;
461
462 fe->font = font;
463 fe->free_pos = 0x21;
464
465 for (i = 0; i < 256; i++) {
466 fe->vector[i] = NULL;
467 fe->indices[i] = -1;
468 fe->to_unicode[i] = 0xFFFF;
469 }
470
471 return fe;
472 }
473
474 static font_data *make_std_font(font_list *fontlist, char const *name)
475 {
476 const int *widths;
477 int nglyphs;
478 font_data *f;
479 font_encoding *fe;
480 int i;
481
482 widths = ps_std_font_widths(name);
483 if (!widths)
484 return NULL;
485
486 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
487
488 f = mknew(font_data);
489
490 f->list = fontlist;
491 f->name = name;
492 f->nglyphs = nglyphs;
493 f->glyphs = ps_std_glyphs;
494 f->widths = widths;
495 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
496
497 /*
498 * Our first subfont will contain all of US-ASCII. This isn't
499 * really necessary - we could just create custom subfonts
500 * precisely as the whim of render_string dictated - but
501 * instinct suggests that it might be nice to have the text in
502 * the output files look _marginally_ recognisable.
503 */
504 fe = new_font_encoding(f);
505 fe->free_pos = 0xA1; /* only the top half is free */
506 f->latest_subfont = fe;
507
508 for (i = 0; i < (int)lenof(f->bmp); i++)
509 f->bmp[i] = 0xFFFF;
510
511 for (i = 0; i < nglyphs; i++) {
512 wchar_t ucs;
513 ucs = ps_glyph_to_unicode(f->glyphs[i]);
514 assert(ucs != 0xFFFF);
515 f->bmp[ucs] = i;
516 if (ucs >= 0x20 && ucs <= 0x7E) {
517 fe->vector[ucs] = f->glyphs[i];
518 fe->indices[ucs] = i;
519 fe->to_unicode[ucs] = ucs;
520 f->subfont_map[i].subfont = fe;
521 f->subfont_map[i].position = ucs;
522 } else {
523 /*
524 * This character is not yet assigned to a subfont.
525 */
526 f->subfont_map[i].subfont = NULL;
527 f->subfont_map[i].position = 0;
528 }
529 }
530
531 return f;
532 }
533
534 static int string_width(font_data *font, wchar_t const *string, int *errs)
535 {
536 int width = 0;
537
538 if (errs)
539 *errs = 0;
540
541 for (; *string; string++) {
542 int index;
543
544 index = font->bmp[(unsigned short)*string];
545 if (index == 0xFFFF) {
546 if (errs)
547 *errs = 1;
548 } else {
549 width += font->widths[index];
550 }
551 }
552
553 return width;
554 }
555
556 static int paper_width_internal(void *vctx, word *word, int *nspaces);
557
558 struct paper_width_ctx {
559 int minspacewidth;
560 para_data *pdata;
561 };
562
563 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
564 int w = 0;
565 while (text && text != end) {
566 w += paper_width_internal(vctx, text, nspaces);
567 text = text->next;
568 }
569 return w;
570 }
571
572 static int paper_width_internal(void *vctx, word *word, int *nspaces)
573 {
574 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
575 int style, type, findex, width, errs;
576 wchar_t *str;
577
578 switch (word->type) {
579 case word_HyperLink:
580 case word_HyperEnd:
581 case word_UpperXref:
582 case word_LowerXref:
583 case word_XrefEnd:
584 case word_IndexRef:
585 return 0;
586 }
587
588 style = towordstyle(word->type);
589 type = removeattr(word->type);
590
591 findex = (style == word_Normal ? FONT_NORMAL :
592 style == word_Emph ? FONT_EMPH :
593 FONT_CODE);
594
595 if (type == word_Normal) {
596 str = word->text;
597 } else if (type == word_WhiteSpace) {
598 if (findex != FONT_CODE) {
599 if (nspaces)
600 (*nspaces)++;
601 return ctx->minspacewidth;
602 } else
603 str = L" ";
604 } else /* if (type == word_Quote) */ {
605 if (word->aux == quote_Open)
606 str = L"\x2018"; /* FIXME: configurability! */
607 else
608 str = L"\x2019"; /* FIXME: configurability! */
609 }
610
611 width = string_width(ctx->pdata->fonts[findex], str, &errs);
612
613 if (errs && word->alt)
614 return paper_width_list(vctx, word->alt, NULL, nspaces);
615 else
616 return ctx->pdata->sizes[findex] * width;
617 }
618
619 static int paper_width(void *vctx, word *word)
620 {
621 return paper_width_internal(vctx, word, NULL);
622 }
623
624 static int paper_width_simple(para_data *pdata, word *text)
625 {
626 struct paper_width_ctx ctx;
627
628 ctx.pdata = pdata;
629 ctx.minspacewidth =
630 (pdata->sizes[FONT_NORMAL] *
631 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
632
633 return paper_width_list(&ctx, text, NULL, NULL);
634 }
635
636 static void wrap_paragraph(para_data *pdata, word *words,
637 int w, int i1, int i2)
638 {
639 wrappedline *wrapping, *p;
640 int spacewidth;
641 struct paper_width_ctx ctx;
642 int line_height;
643
644 /*
645 * We're going to need to store the line height in every line
646 * structure we generate.
647 */
648 {
649 int i;
650 line_height = 0;
651 for (i = 0; i < NFONTS; i++)
652 if (line_height < pdata->sizes[i])
653 line_height = pdata->sizes[i];
654 line_height *= 4096;
655 }
656
657 spacewidth = (pdata->sizes[FONT_NORMAL] *
658 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
659 if (spacewidth == 0) {
660 /*
661 * A font without a space?! Disturbing. I hope this never
662 * comes up, but I'll make a random guess anyway and set my
663 * space width to half the point size.
664 */
665 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
666 }
667
668 /*
669 * I'm going to set the _minimum_ space width to 3/5 of the
670 * standard one, and use the standard one as the optimum.
671 */
672 ctx.minspacewidth = spacewidth * 3 / 5;
673 ctx.pdata = pdata;
674
675 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
676
677 /*
678 * Having done the wrapping, we now concoct a set of line_data
679 * structures.
680 */
681 pdata->first = pdata->last = NULL;
682
683 for (p = wrapping; p; p = p->next) {
684 line_data *ldata;
685 word *wd;
686 int len, wid, spaces;
687
688 ldata = mknew(line_data);
689
690 ldata->pdata = pdata;
691 ldata->first = p->begin;
692 ldata->end = p->end;
693 ldata->line_height = line_height;
694
695 ldata->xpos = (p == wrapping ? i1 : i2);
696
697 if (pdata->last) {
698 pdata->last->next = ldata;
699 ldata->prev = pdata->last;
700 } else {
701 pdata->first = ldata;
702 ldata->prev = NULL;
703 }
704 ldata->next = NULL;
705 pdata->last = ldata;
706
707 spaces = 0;
708 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
709 wid = (p == wrapping ? w - i1 : w - i2);
710 wd = ldata->first;
711
712 ldata->hshortfall = wid - len;
713 ldata->nspaces = spaces;
714 /*
715 * This tells us how much the space width needs to
716 * change from _min_spacewidth. But we want to store
717 * its difference from the _natural_ space width, to
718 * make the text rendering easier.
719 */
720 ldata->hshortfall += ctx.minspacewidth * spaces;
721 ldata->hshortfall -= spacewidth * spaces;
722 /*
723 * Special case: on the last line of a paragraph, we
724 * never stretch spaces.
725 */
726 if (ldata->hshortfall > 0 && !p->next)
727 ldata->hshortfall = 0;
728
729 ldata->aux_text = NULL;
730 ldata->aux_text_2 = NULL;
731 ldata->aux_left_indent = 0;
732 }
733
734 }
735
736 static page_data *page_breaks(line_data *first, line_data *last,
737 int page_height)
738 {
739 line_data *l, *m;
740 page_data *ph, *pt;
741
742 /*
743 * Page breaking is done by a close analogue of the optimal
744 * paragraph wrapping algorithm used by wrap_para(). We work
745 * backwards from the end of the document line by line; for
746 * each line, we contemplate every possible number of lines we
747 * could put on a page starting with that line, determine a
748 * cost function for each one, add it to the pre-computed cost
749 * function for optimally page-breaking everything after that
750 * page, and pick the best option.
751 *
752 * Since my line_data structures are only used for this
753 * purpose, I might as well just store the algorithm data
754 * directly in them.
755 */
756
757 for (l = last; l; l = l->prev) {
758 int minheight, text = 0, space = 0;
759 int cost;
760
761 l->bestcost = -1;
762 for (m = l; m; m = m->next) {
763 if (m != l && m->page_break)
764 break; /* we've gone as far as we can */
765
766 if (m != l)
767 space += m->prev->space_after;
768 if (m != l || m->page_break)
769 space += m->space_before;
770 text += m->line_height;
771 minheight = text + space;
772
773 if (m != l && minheight > page_height)
774 break;
775
776 /*
777 * Compute the cost of this arrangement, as the square
778 * of the amount of wasted space on the page.
779 * Exception: if this is the last page before a
780 * mandatory break or the document end, we don't
781 * penalise a large blank area.
782 */
783 if (m->next && !m->next->page_break)
784 {
785 int x = page_height - minheight;
786 int xf;
787
788 xf = x & 0xFF;
789 x >>= 8;
790
791 cost = x*x;
792 cost += (x * xf) >> 8;
793 } else
794 cost = 0;
795
796 /*
797 * FIXME: here I should introduce penalties for
798 * breaking in mid-paragraph, particularly very close
799 * to one end of a paragraph and particularly in code
800 * paragraphs.
801 */
802
803 if (m->next && !m->next->page_break)
804 cost += m->next->bestcost;
805
806 if (l->bestcost == -1 || l->bestcost > cost) {
807 /*
808 * This is the best option yet for this starting
809 * point.
810 */
811 l->bestcost = cost;
812 if (m->next && !m->next->page_break)
813 l->vshortfall = page_height - minheight;
814 else
815 l->vshortfall = 0;
816 l->text = text;
817 l->space = space;
818 l->page_last = m;
819 }
820 }
821 }
822
823 /*
824 * Now go through the line list forwards and assemble the
825 * actual pages.
826 */
827 ph = pt = NULL;
828
829 l = first;
830 while (l) {
831 page_data *page;
832 int text, space;
833
834 page = mknew(page_data);
835 page->next = NULL;
836 page->prev = pt;
837 if (pt)
838 pt->next = page;
839 else
840 ph = page;
841 pt = page;
842
843 page->first_line = l;
844 page->last_line = l->page_last;
845
846 page->first_text = page->last_text = NULL;
847
848 page->first_xref = page->last_xref = NULL;
849
850 /*
851 * Now assign a y-coordinate to each line on the page.
852 */
853 text = space = 0;
854 for (l = page->first_line; l; l = l->next) {
855 if (l != page->first_line)
856 space += l->prev->space_after;
857 if (l != page->first_line || l->page_break)
858 space += l->space_before;
859 text += l->line_height;
860
861 l->page = page;
862 l->ypos = text + space +
863 space * (float)page->first_line->vshortfall /
864 page->first_line->space;
865
866 if (l == page->last_line)
867 break;
868 }
869
870 l = page->last_line->next;
871 }
872
873 return ph;
874 }
875
876 static void add_string_to_page(page_data *page, int x, int y,
877 font_encoding *fe, int size, char *text)
878 {
879 text_fragment *frag;
880
881 frag = mknew(text_fragment);
882 frag->next = NULL;
883
884 if (page->last_text)
885 page->last_text->next = frag;
886 else
887 page->first_text = frag;
888 page->last_text = frag;
889
890 frag->x = x;
891 frag->y = y;
892 frag->fe = fe;
893 frag->fontsize = size;
894 frag->text = dupstr(text);
895 }
896
897 /*
898 * Returns the updated x coordinate.
899 */
900 static int render_string(page_data *page, font_data *font, int fontsize,
901 int x, int y, wchar_t *str)
902 {
903 char *text;
904 int textpos, textwid, glyph;
905 font_encoding *subfont = NULL, *sf;
906
907 text = mknewa(char, 1 + ustrlen(str));
908 textpos = textwid = 0;
909
910 while (*str) {
911 glyph = font->bmp[*str];
912
913 if (glyph == 0xFFFF)
914 continue; /* nothing more we can do here */
915
916 /*
917 * Find which subfont this character is going in.
918 */
919 sf = font->subfont_map[glyph].subfont;
920
921 if (!sf) {
922 int c;
923
924 /*
925 * This character is not yet in a subfont. Assign one.
926 */
927 if (font->latest_subfont->free_pos >= 0x100)
928 font->latest_subfont = new_font_encoding(font);
929
930 c = font->latest_subfont->free_pos++;
931 if (font->latest_subfont->free_pos == 0x7F)
932 font->latest_subfont->free_pos = 0xA1;
933
934 font->subfont_map[glyph].subfont = font->latest_subfont;
935 font->subfont_map[glyph].position = c;
936 font->latest_subfont->vector[c] = font->glyphs[glyph];
937 font->latest_subfont->indices[c] = glyph;
938 font->latest_subfont->to_unicode[c] = *str;
939
940 sf = font->latest_subfont;
941 }
942
943 if (!subfont || sf != subfont) {
944 if (subfont) {
945 text[textpos] = '\0';
946 add_string_to_page(page, x, y, subfont, fontsize, text);
947 x += textwid;
948 } else {
949 assert(textpos == 0);
950 }
951 textpos = 0;
952 subfont = sf;
953 }
954
955 text[textpos++] = font->subfont_map[glyph].position;
956 textwid += font->widths[glyph] * fontsize;
957
958 str++;
959 }
960
961 if (textpos > 0) {
962 text[textpos] = '\0';
963 add_string_to_page(page, x, y, subfont, fontsize, text);
964 x += textwid;
965 }
966
967 return x;
968 }
969
970 /*
971 * Returns the updated x coordinate.
972 */
973 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
974 int x, int y, word *text, word *text_end, xref **xr,
975 int shortfall, int nspaces, int *nspace,
976 keywordlist *keywords)
977 {
978 while (text && text != text_end) {
979 int style, type, findex, errs;
980 wchar_t *str;
981 xref_dest dest;
982
983 switch (text->type) {
984 /*
985 * Start a cross-reference.
986 */
987 case word_HyperLink:
988 case word_UpperXref:
989 case word_LowerXref:
990
991 if (text->type == word_HyperLink) {
992 dest.type = URL;
993 dest.url = utoa_dup(text->text);
994 dest.page = NULL;
995 } else {
996 keyword *kwl = kw_lookup(keywords, text->text);
997 para_data *pdata;
998
999 if (kwl) {
1000 assert(kwl->para->private_data);
1001 pdata = (para_data *) kwl->para->private_data;
1002 dest.type = PAGE;
1003 dest.page = pdata->first->page;
1004 dest.url = NULL;
1005 } else {
1006 /*
1007 * Shouldn't happen, but *shrug*
1008 */
1009 dest.type = NONE;
1010 dest.page = NULL;
1011 dest.url = NULL;
1012 }
1013 }
1014 if (dest.type != NONE) {
1015 *xr = mknew(xref);
1016 (*xr)->dest = dest; /* structure copy */
1017 if (page->last_xref)
1018 page->last_xref->next = *xr;
1019 else
1020 page->first_xref = *xr;
1021 page->last_xref = *xr;
1022
1023 /*
1024 * FIXME: Ideally we should have, and use, some
1025 * vertical font metric information here so that
1026 * our cross-ref rectangle can take account of
1027 * descenders and the font's cap height. This will
1028 * do for the moment, but it isn't ideal.
1029 */
1030 (*xr)->lx = (*xr)->rx = x;
1031 (*xr)->by = y;
1032 (*xr)->ty = y + ldata->line_height;
1033 }
1034 goto nextword;
1035
1036 /*
1037 * Finish extending a cross-reference box.
1038 */
1039 case word_HyperEnd:
1040 case word_XrefEnd:
1041 *xr = NULL;
1042 goto nextword;
1043
1044 case word_IndexRef:
1045 goto nextword;
1046 /*
1047 * FIXME: we should do something with this.
1048 */
1049 }
1050
1051 style = towordstyle(text->type);
1052 type = removeattr(text->type);
1053
1054 findex = (style == word_Normal ? FONT_NORMAL :
1055 style == word_Emph ? FONT_EMPH :
1056 FONT_CODE);
1057
1058 if (type == word_Normal) {
1059 str = text->text;
1060 } else if (type == word_WhiteSpace) {
1061 x += pdata->sizes[findex] *
1062 string_width(pdata->fonts[findex], L" ", NULL);
1063 if (nspaces && findex != FONT_CODE) {
1064 x += (*nspace+1) * shortfall / nspaces;
1065 x -= *nspace * shortfall / nspaces;
1066 (*nspace)++;
1067 }
1068 goto nextword;
1069 } else /* if (type == word_Quote) */ {
1070 if (text->aux == quote_Open)
1071 str = L"\x2018"; /* FIXME: configurability! */
1072 else
1073 str = L"\x2019"; /* FIXME: configurability! */
1074 }
1075
1076 (void) string_width(pdata->fonts[findex], str, &errs);
1077
1078 if (errs && text->alt)
1079 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1080 xr, shortfall, nspaces, nspace, keywords);
1081 else
1082 x = render_string(page, pdata->fonts[findex],
1083 pdata->sizes[findex], x, y, str);
1084
1085 if (*xr)
1086 (*xr)->rx = x;
1087
1088 nextword:
1089 text = text->next;
1090 }
1091
1092 return x;
1093 }
1094
1095 static void render_line(line_data *ldata, int left_x, int top_y,
1096 xref_dest *dest, keywordlist *keywords)
1097 {
1098 int nspace;
1099 xref *xr;
1100
1101 if (ldata->aux_text) {
1102 int x;
1103 xr = NULL;
1104 nspace = 0;
1105 x = render_text(ldata->page, ldata->pdata, ldata,
1106 left_x + ldata->aux_left_indent,
1107 top_y - ldata->ypos,
1108 ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1109 if (ldata->aux_text_2)
1110 render_text(ldata->page, ldata->pdata, ldata,
1111 x, top_y - ldata->ypos,
1112 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1113 }
1114 nspace = 0;
1115
1116 /*
1117 * There might be a cross-reference carried over from a
1118 * previous line.
1119 */
1120 if (dest->type != NONE) {
1121 xr = mknew(xref);
1122 xr->dest = *dest; /* structure copy */
1123 if (ldata->page->last_xref)
1124 ldata->page->last_xref->next = xr;
1125 else
1126 ldata->page->first_xref = xr;
1127 ldata->page->last_xref = xr;
1128 xr->lx = xr->rx = left_x + ldata->xpos;
1129 xr->by = top_y - ldata->ypos;
1130 xr->ty = top_y - ldata->ypos + ldata->line_height;
1131 } else
1132 xr = NULL;
1133
1134 render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1135 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1136 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1137
1138 if (xr) {
1139 /*
1140 * There's a cross-reference continued on to the next line.
1141 */
1142 *dest = xr->dest;
1143 } else
1144 dest->type = NONE;
1145 }
1146
1147 static void code_paragraph(para_data *pdata,
1148 font_data *fn, font_data *fi, font_data *fb,
1149 int font_size, int indent, word *words)
1150 {
1151 /*
1152 * For code paragraphs, I'm going to hack grievously and
1153 * pretend the three normal fonts are the three code paragraph
1154 * fonts.
1155 */
1156 pdata->fonts[FONT_NORMAL] = fb;
1157 pdata->fonts[FONT_EMPH] = fi;
1158 pdata->fonts[FONT_CODE] = fn;
1159 pdata->sizes[FONT_NORMAL] =
1160 pdata->sizes[FONT_EMPH] =
1161 pdata->sizes[FONT_CODE] = font_size;
1162
1163 pdata->first = pdata->last = NULL;
1164
1165 for (; words; words = words->next) {
1166 wchar_t *t, *e, *start;
1167 word *lhead = NULL, *ltail = NULL, *w;
1168 line_data *ldata;
1169 int prev = -1, curr;
1170
1171 t = words->text;
1172 if (words->next && words->next->type == word_Emph) {
1173 e = words->next->text;
1174 words = words->next;
1175 } else
1176 e = NULL;
1177
1178 start = t;
1179
1180 while (*start) {
1181 while (*t) {
1182 if (!e || !*e)
1183 curr = 0;
1184 else if (*e == L'i')
1185 curr = 1;
1186 else if (*e == L'b')
1187 curr = 2;
1188 else
1189 curr = 0;
1190
1191 if (prev < 0)
1192 prev = curr;
1193
1194 if (curr != prev)
1195 break;
1196
1197 t++;
1198 if (e && *e)
1199 e++;
1200 }
1201
1202 /*
1203 * We've isolated a maximal subsequence of the line
1204 * which has the same emphasis. Form it into a word
1205 * structure.
1206 */
1207 w = mknew(word);
1208 w->next = NULL;
1209 w->alt = NULL;
1210 w->type = (prev == 0 ? word_WeakCode :
1211 prev == 1 ? word_Emph : word_Normal);
1212 w->text = mknewa(wchar_t, t-start+1);
1213 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1214 w->text[t-start] = '\0';
1215 w->breaks = FALSE;
1216
1217 if (ltail)
1218 ltail->next = w;
1219 else
1220 lhead = w;
1221 ltail = w;
1222
1223 start = t;
1224 prev = -1;
1225 }
1226
1227 ldata = mknew(line_data);
1228
1229 ldata->pdata = pdata;
1230 ldata->first = lhead;
1231 ldata->end = NULL;
1232 ldata->line_height = font_size * 4096;
1233
1234 ldata->xpos = indent;
1235
1236 if (pdata->last) {
1237 pdata->last->next = ldata;
1238 ldata->prev = pdata->last;
1239 } else {
1240 pdata->first = ldata;
1241 ldata->prev = NULL;
1242 }
1243 ldata->next = NULL;
1244 pdata->last = ldata;
1245
1246 ldata->hshortfall = 0;
1247 ldata->nspaces = 0;
1248 ldata->aux_text = NULL;
1249 ldata->aux_text_2 = NULL;
1250 ldata->aux_left_indent = 0;
1251
1252 }
1253 }