Implemented all the missing rendering features (such as different
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - set up contents section now we know what sections begin on
16 * which pages
17 *
18 * - do PDF outline
19 *
20 * - index
21 *
22 * That should bring us to the same level of functionality that
23 * original-Halibut had, and the same in PDF plus the obvious
24 * interactive navigation features. After that, in future work:
25 *
26 * - linearised PDF, perhaps?
27 *
28 * - I'm uncertain of whether I need to include a ToUnicode CMap
29 * in each of my font definitions in PDF. Currently things (by
30 * which I mean cut and paste out of acroread) seem to be
31 * working fairly happily without it, but I don't know.
32 *
33 * - configurability
34 *
35 * - title pages
36 */
37
38 #include <assert.h>
39 #include <stdio.h>
40
41 #include "halibut.h"
42 #include "paper.h"
43
44 static font_data *make_std_font(font_list *fontlist, char const *name);
45 static void wrap_paragraph(para_data *pdata, word *words,
46 int w, int i1, int i2);
47 static page_data *page_breaks(line_data *first, line_data *last,
48 int page_height);
49 static void render_line(line_data *ldata, int left_x, int top_y,
50 xref_dest *dest, keywordlist *keywords);
51 static int paper_width_simple(para_data *pdata, word *text);
52 static void code_paragraph(para_data *pdata,
53 font_data *fn, font_data *fi, font_data *fb,
54 int font_size, int indent, word *words);
55
56 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
57 indexdata *idx) {
58 paragraph *p;
59 document *doc;
60 int indent, extra_indent, firstline_indent, aux_indent;
61 para_data *pdata;
62 line_data *ldata, *firstline, *lastline;
63 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
64 page_data *pages;
65 font_list *fontlist;
66 word *aux, *aux2;
67
68 /*
69 * FIXME: All these things ought to become configurable.
70 */
71 int paper_width = 595 * 4096;
72 int paper_height = 841 * 4096;
73 int left_margin = 72 * 4096;
74 int top_margin = 72 * 4096;
75 int right_margin = 72 * 4096;
76 int bottom_margin = 108 * 4096;
77 int indent_list_bullet = 6 * 4096;
78 int indent_list = 24 * 4096;
79 int indent_quote = 18 * 4096;
80 int base_leading = 4096;
81 int base_para_spacing = 10 * 4096;
82 int chapter_top_space = 72 * 4096;
83 int sect_num_left_space = 12 * 4096;
84
85 int base_width = paper_width - left_margin - right_margin;
86 int page_height = paper_height - top_margin - bottom_margin;
87
88 IGNORE(keywords); /* FIXME */
89 IGNORE(idx); /* FIXME */
90
91 /*
92 * First, set up some font structures.
93 */
94 fontlist = mknew(font_list);
95 fontlist->head = fontlist->tail = NULL;
96 tr = make_std_font(fontlist, "Times-Roman");
97 ti = make_std_font(fontlist, "Times-Italic");
98 hr = make_std_font(fontlist, "Helvetica-Bold");
99 hi = make_std_font(fontlist, "Helvetica-BoldOblique");
100 cr = make_std_font(fontlist, "Courier");
101 co = make_std_font(fontlist, "Courier-Oblique");
102 cb = make_std_font(fontlist, "Courier-Bold");
103
104 /*
105 * Go through and break up each paragraph into lines.
106 */
107 indent = 0;
108 firstline = lastline = NULL;
109 for (p = sourceform; p; p = p->next) {
110 p->private_data = NULL;
111
112 switch (p->type) {
113 /*
114 * These paragraph types are either invisible or don't
115 * define text in the normal sense. Either way, they
116 * don't require wrapping.
117 */
118 case para_IM:
119 case para_BR:
120 case para_Rule:
121 case para_Biblio:
122 case para_NotParaType:
123 case para_Config:
124 case para_VersionID:
125 case para_NoCite:
126 break;
127
128 /*
129 * These paragraph types don't require wrapping, but
130 * they do affect the line width to which we wrap the
131 * rest of the paragraphs, so we need to pay attention.
132 */
133 case para_LcontPush:
134 indent += indent_list; break;
135 case para_LcontPop:
136 indent -= indent_list; assert(indent >= 0); break;
137 case para_QuotePush:
138 indent += indent_quote; break;
139 case para_QuotePop:
140 indent -= indent_quote; assert(indent >= 0); break;
141
142 /*
143 * This paragraph type is special. Process it
144 * specially.
145 */
146 case para_Code:
147 pdata = mknew(para_data);
148 code_paragraph(pdata, cr, co, cb, 12, indent, p->words);
149 p->private_data = pdata;
150 break;
151
152 /*
153 * All of these paragraph types require wrapping in the
154 * ordinary way. So we must supply a set of fonts, a
155 * line width and auxiliary information (e.g. bullet
156 * text) for each one.
157 */
158 case para_Chapter:
159 case para_Appendix:
160 case para_UnnumberedChapter:
161 case para_Heading:
162 case para_Subsect:
163 case para_Normal:
164 case para_BiblioCited:
165 case para_Bullet:
166 case para_NumberedList:
167 case para_DescribedThing:
168 case para_Description:
169 case para_Copyright:
170 case para_Title:
171 pdata = mknew(para_data);
172
173 /*
174 * Choose fonts for this paragraph.
175 *
176 * FIXME: All of this ought to be completely
177 * user-configurable.
178 */
179 switch (p->type) {
180 case para_Title:
181 pdata->fonts[FONT_NORMAL] = hr;
182 pdata->sizes[FONT_NORMAL] = 24;
183 pdata->fonts[FONT_EMPH] = hi;
184 pdata->sizes[FONT_EMPH] = 24;
185 pdata->fonts[FONT_CODE] = cb;
186 pdata->sizes[FONT_CODE] = 24;
187 break;
188
189 case para_Chapter:
190 case para_Appendix:
191 case para_UnnumberedChapter:
192 pdata->fonts[FONT_NORMAL] = hr;
193 pdata->sizes[FONT_NORMAL] = 20;
194 pdata->fonts[FONT_EMPH] = hi;
195 pdata->sizes[FONT_EMPH] = 20;
196 pdata->fonts[FONT_CODE] = cb;
197 pdata->sizes[FONT_CODE] = 20;
198 break;
199
200 case para_Heading:
201 case para_Subsect:
202 pdata->fonts[FONT_NORMAL] = hr;
203 pdata->fonts[FONT_EMPH] = hi;
204 pdata->fonts[FONT_CODE] = cb;
205 pdata->sizes[FONT_NORMAL] =
206 pdata->sizes[FONT_EMPH] =
207 pdata->sizes[FONT_CODE] =
208 (p->aux == 0 ? 16 : p->aux == 1 ? 14 : 13);
209 break;
210
211 case para_Normal:
212 case para_BiblioCited:
213 case para_Bullet:
214 case para_NumberedList:
215 case para_DescribedThing:
216 case para_Description:
217 case para_Copyright:
218 pdata->fonts[FONT_NORMAL] = tr;
219 pdata->sizes[FONT_NORMAL] = 12;
220 pdata->fonts[FONT_EMPH] = ti;
221 pdata->sizes[FONT_EMPH] = 12;
222 pdata->fonts[FONT_CODE] = cr;
223 pdata->sizes[FONT_CODE] = 12;
224 break;
225 }
226
227 /*
228 * Also select an indentation level depending on the
229 * paragraph type (list paragraphs other than
230 * para_DescribedThing need extra indent).
231 *
232 * (FIXME: Perhaps at some point we might even arrange
233 * for the user to be able to request indented first
234 * lines in paragraphs.)
235 */
236 if (p->type == para_Bullet ||
237 p->type == para_NumberedList ||
238 p->type == para_Description) {
239 extra_indent = firstline_indent = indent_list;
240 } else {
241 extra_indent = firstline_indent = 0;
242 }
243
244 /*
245 * Find the auxiliary text for this paragraph.
246 */
247 aux = aux2 = NULL;
248 aux_indent = 0;
249
250 switch (p->type) {
251 case para_Chapter:
252 case para_Appendix:
253 case para_Heading:
254 case para_Subsect:
255 /*
256 * For some heading styles (FIXME: be able to
257 * configure which), the auxiliary text contains
258 * the chapter number and is arranged to be
259 * right-aligned a few points left of the primary
260 * margin. For other styles, the auxiliary text is
261 * the full chapter _name_ and takes up space
262 * within the (wrapped) chapter title, meaning that
263 * we must move the first line indent over to make
264 * space for it.
265 */
266 if (p->type == para_Heading || p->type == para_Subsect) {
267 int len;
268
269 aux = p->kwtext2;
270 len = paper_width_simple(pdata, p->kwtext2);
271 aux_indent = -len - sect_num_left_space;
272 } else {
273 aux = p->kwtext;
274 aux2 = mknew(word);
275 aux2->next = NULL;
276 aux2->alt = NULL;
277 aux2->type = word_Normal;
278 aux2->text = ustrdup(L": ");
279 aux2->breaks = FALSE;
280 aux2->aux = 0;
281 aux_indent = 0;
282
283 firstline_indent += paper_width_simple(pdata, aux);
284 firstline_indent += paper_width_simple(pdata, aux2);
285 }
286 break;
287
288 case para_Bullet:
289 /*
290 * Auxiliary text consisting of a bullet. (FIXME:
291 * configurable bullet.)
292 */
293 aux = mknew(word);
294 aux->next = NULL;
295 aux->alt = NULL;
296 aux->type = word_Normal;
297 aux->text = ustrdup(L"\x2022");
298 aux->breaks = FALSE;
299 aux->aux = 0;
300 aux_indent = indent + indent_list_bullet;
301 break;
302
303 case para_NumberedList:
304 /*
305 * Auxiliary text consisting of the number followed
306 * by a (FIXME: configurable) full stop.
307 */
308 aux = p->kwtext;
309 aux2 = mknew(word);
310 aux2->next = NULL;
311 aux2->alt = NULL;
312 aux2->type = word_Normal;
313 aux2->text = ustrdup(L".");
314 aux2->breaks = FALSE;
315 aux2->aux = 0;
316 aux_indent = indent + indent_list_bullet;
317 break;
318
319 case para_BiblioCited:
320 /*
321 * Auxiliary text consisting of the bibliography
322 * reference text, and a trailing space.
323 */
324 aux = p->kwtext;
325 aux2 = mknew(word);
326 aux2->next = NULL;
327 aux2->alt = NULL;
328 aux2->type = word_Normal;
329 aux2->text = ustrdup(L" ");
330 aux2->breaks = FALSE;
331 aux2->aux = 0;
332 aux_indent = indent;
333 firstline_indent += paper_width_simple(pdata, aux);
334 firstline_indent += paper_width_simple(pdata, aux2);
335 break;
336 }
337
338 wrap_paragraph(pdata, p->words, base_width,
339 indent + firstline_indent,
340 indent + extra_indent);
341
342 p->private_data = pdata;
343
344 pdata->first->aux_text = aux;
345 pdata->first->aux_text_2 = aux2;
346 pdata->first->aux_left_indent = aux_indent;
347
348 break;
349 }
350
351 if (p->private_data) {
352 pdata = (para_data *)p->private_data;
353
354 /*
355 * Set the line spacing for each line in this paragraph.
356 */
357 for (ldata = pdata->first; ldata; ldata = ldata->next) {
358 if (ldata == pdata->first)
359 ldata->space_before = base_para_spacing / 2;
360 else
361 ldata->space_before = base_leading / 2;
362 if (ldata == pdata->last)
363 ldata->space_after = base_para_spacing / 2;
364 else
365 ldata->space_after = base_leading / 2;
366 ldata->page_break = FALSE;
367 }
368
369 /*
370 * Some kinds of section heading do require a page
371 * break before them.
372 */
373 if (p->type == para_Title ||
374 p->type == para_Chapter ||
375 p->type == para_Appendix ||
376 p->type == para_UnnumberedChapter) {
377 pdata->first->page_break = TRUE;
378 pdata->first->space_before = chapter_top_space;
379 }
380
381 /*
382 * Link all line structures together into a big list.
383 */
384 if (pdata->first) {
385 if (lastline) {
386 lastline->next = pdata->first;
387 pdata->first->prev = lastline;
388 } else {
389 firstline = pdata->first;
390 pdata->first->prev = NULL;
391 }
392 lastline = pdata->last;
393 }
394 }
395 }
396
397 /*
398 * Now we have an enormous linked list of every line of text in
399 * the document. Break it up into pages.
400 */
401 pages = page_breaks(firstline, lastline, page_height);
402
403 /*
404 * Now we're ready to actually lay out the pages. We do this by
405 * looping over _paragraphs_, since we may need to track cross-
406 * references between lines and even across pages.
407 */
408 for (p = sourceform; p; p = p->next) {
409 pdata = (para_data *)p->private_data;
410
411 if (pdata) {
412 xref_dest dest;
413 dest.type = NONE;
414 for (ldata = pdata->first; ldata; ldata = ldata->next) {
415 render_line(ldata, left_margin, paper_height - top_margin,
416 &dest, keywords);
417 if (ldata == pdata->last)
418 break;
419 }
420 }
421 }
422
423 doc = mknew(document);
424 doc->fonts = fontlist;
425 doc->pages = pages;
426 doc->paper_width = paper_width;
427 doc->paper_height = paper_height;
428 return doc;
429 }
430
431 static font_encoding *new_font_encoding(font_data *font)
432 {
433 font_encoding *fe;
434 int i;
435
436 fe = mknew(font_encoding);
437 fe->next = NULL;
438
439 if (font->list->tail)
440 font->list->tail->next = fe;
441 else
442 font->list->head = fe;
443 font->list->tail = fe;
444
445 fe->font = font;
446 fe->free_pos = 0x21;
447
448 for (i = 0; i < 256; i++) {
449 fe->vector[i] = NULL;
450 fe->indices[i] = -1;
451 fe->to_unicode[i] = 0xFFFF;
452 }
453
454 return fe;
455 }
456
457 static font_data *make_std_font(font_list *fontlist, char const *name)
458 {
459 const int *widths;
460 int nglyphs;
461 font_data *f;
462 font_encoding *fe;
463 int i;
464
465 widths = ps_std_font_widths(name);
466 if (!widths)
467 return NULL;
468
469 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
470
471 f = mknew(font_data);
472
473 f->list = fontlist;
474 f->name = name;
475 f->nglyphs = nglyphs;
476 f->glyphs = ps_std_glyphs;
477 f->widths = widths;
478 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
479
480 /*
481 * Our first subfont will contain all of US-ASCII. This isn't
482 * really necessary - we could just create custom subfonts
483 * precisely as the whim of render_string dictated - but
484 * instinct suggests that it might be nice to have the text in
485 * the output files look _marginally_ recognisable.
486 */
487 fe = new_font_encoding(f);
488 fe->free_pos = 0xA1; /* only the top half is free */
489 f->latest_subfont = fe;
490
491 for (i = 0; i < (int)lenof(f->bmp); i++)
492 f->bmp[i] = 0xFFFF;
493
494 for (i = 0; i < nglyphs; i++) {
495 wchar_t ucs;
496 ucs = ps_glyph_to_unicode(f->glyphs[i]);
497 assert(ucs != 0xFFFF);
498 f->bmp[ucs] = i;
499 if (ucs >= 0x20 && ucs <= 0x7E) {
500 fe->vector[ucs] = f->glyphs[i];
501 fe->indices[ucs] = i;
502 fe->to_unicode[ucs] = ucs;
503 f->subfont_map[i].subfont = fe;
504 f->subfont_map[i].position = ucs;
505 } else {
506 /*
507 * This character is not yet assigned to a subfont.
508 */
509 f->subfont_map[i].subfont = NULL;
510 f->subfont_map[i].position = 0;
511 }
512 }
513
514 return f;
515 }
516
517 static int string_width(font_data *font, wchar_t const *string, int *errs)
518 {
519 int width = 0;
520
521 if (errs)
522 *errs = 0;
523
524 for (; *string; string++) {
525 int index;
526
527 index = font->bmp[(unsigned short)*string];
528 if (index == 0xFFFF) {
529 if (errs)
530 *errs = 1;
531 } else {
532 width += font->widths[index];
533 }
534 }
535
536 return width;
537 }
538
539 static int paper_width_internal(void *vctx, word *word, int *nspaces);
540
541 struct paper_width_ctx {
542 int minspacewidth;
543 para_data *pdata;
544 };
545
546 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
547 int w = 0;
548 while (text && text != end) {
549 w += paper_width_internal(vctx, text, nspaces);
550 text = text->next;
551 }
552 return w;
553 }
554
555 static int paper_width_internal(void *vctx, word *word, int *nspaces)
556 {
557 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
558 int style, type, findex, width, errs;
559 wchar_t *str;
560
561 switch (word->type) {
562 case word_HyperLink:
563 case word_HyperEnd:
564 case word_UpperXref:
565 case word_LowerXref:
566 case word_XrefEnd:
567 case word_IndexRef:
568 return 0;
569 }
570
571 style = towordstyle(word->type);
572 type = removeattr(word->type);
573
574 findex = (style == word_Normal ? FONT_NORMAL :
575 style == word_Emph ? FONT_EMPH :
576 FONT_CODE);
577
578 if (type == word_Normal) {
579 str = word->text;
580 } else if (type == word_WhiteSpace) {
581 if (findex != FONT_CODE) {
582 if (nspaces)
583 (*nspaces)++;
584 return ctx->minspacewidth;
585 } else
586 str = L" ";
587 } else /* if (type == word_Quote) */ {
588 if (word->aux == quote_Open)
589 str = L"\x2018"; /* FIXME: configurability! */
590 else
591 str = L"\x2019"; /* FIXME: configurability! */
592 }
593
594 width = string_width(ctx->pdata->fonts[findex], str, &errs);
595
596 if (errs && word->alt)
597 return paper_width_list(vctx, word->alt, NULL, nspaces);
598 else
599 return ctx->pdata->sizes[findex] * width;
600 }
601
602 static int paper_width(void *vctx, word *word)
603 {
604 return paper_width_internal(vctx, word, NULL);
605 }
606
607 static int paper_width_simple(para_data *pdata, word *text)
608 {
609 struct paper_width_ctx ctx;
610
611 ctx.pdata = pdata;
612 ctx.minspacewidth =
613 (pdata->sizes[FONT_NORMAL] *
614 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
615
616 return paper_width_list(&ctx, text, NULL, NULL);
617 }
618
619 static void wrap_paragraph(para_data *pdata, word *words,
620 int w, int i1, int i2)
621 {
622 wrappedline *wrapping, *p;
623 int spacewidth;
624 struct paper_width_ctx ctx;
625 int line_height;
626
627 /*
628 * We're going to need to store the line height in every line
629 * structure we generate.
630 */
631 {
632 int i;
633 line_height = 0;
634 for (i = 0; i < NFONTS; i++)
635 if (line_height < pdata->sizes[i])
636 line_height = pdata->sizes[i];
637 line_height *= 4096;
638 }
639
640 spacewidth = (pdata->sizes[FONT_NORMAL] *
641 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
642 if (spacewidth == 0) {
643 /*
644 * A font without a space?! Disturbing. I hope this never
645 * comes up, but I'll make a random guess anyway and set my
646 * space width to half the point size.
647 */
648 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
649 }
650
651 /*
652 * I'm going to set the _minimum_ space width to 3/5 of the
653 * standard one, and use the standard one as the optimum.
654 */
655 ctx.minspacewidth = spacewidth * 3 / 5;
656 ctx.pdata = pdata;
657
658 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
659
660 /*
661 * Having done the wrapping, we now concoct a set of line_data
662 * structures.
663 */
664 pdata->first = pdata->last = NULL;
665
666 for (p = wrapping; p; p = p->next) {
667 line_data *ldata;
668 word *wd;
669 int len, wid, spaces;
670
671 ldata = mknew(line_data);
672
673 ldata->pdata = pdata;
674 ldata->first = p->begin;
675 ldata->end = p->end;
676 ldata->line_height = line_height;
677
678 ldata->xpos = (p == wrapping ? i1 : i2);
679
680 if (pdata->last) {
681 pdata->last->next = ldata;
682 ldata->prev = pdata->last;
683 } else {
684 pdata->first = ldata;
685 ldata->prev = NULL;
686 }
687 ldata->next = NULL;
688 pdata->last = ldata;
689
690 spaces = 0;
691 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
692 wid = (p == wrapping ? w - i1 : w - i2);
693 wd = ldata->first;
694
695 ldata->hshortfall = wid - len;
696 ldata->nspaces = spaces;
697 /*
698 * This tells us how much the space width needs to
699 * change from _min_spacewidth. But we want to store
700 * its difference from the _natural_ space width, to
701 * make the text rendering easier.
702 */
703 ldata->hshortfall += ctx.minspacewidth * spaces;
704 ldata->hshortfall -= spacewidth * spaces;
705 /*
706 * Special case: on the last line of a paragraph, we
707 * never stretch spaces.
708 */
709 if (ldata->hshortfall > 0 && !p->next)
710 ldata->hshortfall = 0;
711
712 ldata->aux_text = NULL;
713 ldata->aux_text_2 = NULL;
714 ldata->aux_left_indent = 0;
715 }
716
717 }
718
719 static page_data *page_breaks(line_data *first, line_data *last,
720 int page_height)
721 {
722 line_data *l, *m;
723 page_data *ph, *pt;
724
725 /*
726 * Page breaking is done by a close analogue of the optimal
727 * paragraph wrapping algorithm used by wrap_para(). We work
728 * backwards from the end of the document line by line; for
729 * each line, we contemplate every possible number of lines we
730 * could put on a page starting with that line, determine a
731 * cost function for each one, add it to the pre-computed cost
732 * function for optimally page-breaking everything after that
733 * page, and pick the best option.
734 *
735 * Since my line_data structures are only used for this
736 * purpose, I might as well just store the algorithm data
737 * directly in them.
738 */
739
740 for (l = last; l; l = l->prev) {
741 int minheight, text = 0, space = 0;
742 int cost;
743
744 l->bestcost = -1;
745 for (m = l; m; m = m->next) {
746 if (m != l && m->page_break)
747 break; /* we've gone as far as we can */
748
749 if (m != l)
750 space += m->prev->space_after;
751 if (m != l || m->page_break)
752 space += m->space_before;
753 text += m->line_height;
754 minheight = text + space;
755
756 if (m != l && minheight > page_height)
757 break;
758
759 /*
760 * Compute the cost of this arrangement, as the square
761 * of the amount of wasted space on the page.
762 * Exception: if this is the last page before a
763 * mandatory break or the document end, we don't
764 * penalise a large blank area.
765 */
766 if (m->next && !m->next->page_break)
767 {
768 int x = page_height - minheight;
769 int xf;
770
771 xf = x & 0xFF;
772 x >>= 8;
773
774 cost = x*x;
775 cost += (x * xf) >> 8;
776 } else
777 cost = 0;
778
779 /*
780 * FIXME: here I should introduce penalties for
781 * breaking in mid-paragraph, particularly very close
782 * to one end of a paragraph and particularly in code
783 * paragraphs.
784 */
785
786 if (m->next && !m->next->page_break)
787 cost += m->next->bestcost;
788
789 if (l->bestcost == -1 || l->bestcost > cost) {
790 /*
791 * This is the best option yet for this starting
792 * point.
793 */
794 l->bestcost = cost;
795 if (m->next && !m->next->page_break)
796 l->vshortfall = page_height - minheight;
797 else
798 l->vshortfall = 0;
799 l->text = text;
800 l->space = space;
801 l->page_last = m;
802 }
803 }
804 }
805
806 /*
807 * Now go through the line list forwards and assemble the
808 * actual pages.
809 */
810 ph = pt = NULL;
811
812 l = first;
813 while (l) {
814 page_data *page;
815 int text, space;
816
817 page = mknew(page_data);
818 page->next = NULL;
819 page->prev = pt;
820 if (pt)
821 pt->next = page;
822 else
823 ph = page;
824 pt = page;
825
826 page->first_line = l;
827 page->last_line = l->page_last;
828
829 page->first_text = page->last_text = NULL;
830
831 page->first_xref = page->last_xref = NULL;
832
833 /*
834 * Now assign a y-coordinate to each line on the page.
835 */
836 text = space = 0;
837 for (l = page->first_line; l; l = l->next) {
838 if (l != page->first_line)
839 space += l->prev->space_after;
840 if (l != page->first_line || l->page_break)
841 space += l->space_before;
842 text += l->line_height;
843
844 l->page = page;
845 l->ypos = text + space +
846 space * (float)page->first_line->vshortfall /
847 page->first_line->space;
848
849 if (l == page->last_line)
850 break;
851 }
852
853 l = page->last_line->next;
854 }
855
856 return ph;
857 }
858
859 static void add_string_to_page(page_data *page, int x, int y,
860 font_encoding *fe, int size, char *text)
861 {
862 text_fragment *frag;
863
864 frag = mknew(text_fragment);
865 frag->next = NULL;
866
867 if (page->last_text)
868 page->last_text->next = frag;
869 else
870 page->first_text = frag;
871 page->last_text = frag;
872
873 frag->x = x;
874 frag->y = y;
875 frag->fe = fe;
876 frag->fontsize = size;
877 frag->text = dupstr(text);
878 }
879
880 /*
881 * Returns the updated x coordinate.
882 */
883 static int render_string(page_data *page, font_data *font, int fontsize,
884 int x, int y, wchar_t *str)
885 {
886 char *text;
887 int textpos, textwid, glyph;
888 font_encoding *subfont = NULL, *sf;
889
890 text = mknewa(char, 1 + ustrlen(str));
891 textpos = textwid = 0;
892
893 while (*str) {
894 glyph = font->bmp[*str];
895
896 if (glyph == 0xFFFF)
897 continue; /* nothing more we can do here */
898
899 /*
900 * Find which subfont this character is going in.
901 */
902 sf = font->subfont_map[glyph].subfont;
903
904 if (!sf) {
905 int c;
906
907 /*
908 * This character is not yet in a subfont. Assign one.
909 */
910 if (font->latest_subfont->free_pos >= 0x100)
911 font->latest_subfont = new_font_encoding(font);
912
913 c = font->latest_subfont->free_pos++;
914 if (font->latest_subfont->free_pos == 0x7F)
915 font->latest_subfont->free_pos = 0xA1;
916
917 font->subfont_map[glyph].subfont = font->latest_subfont;
918 font->subfont_map[glyph].position = c;
919 font->latest_subfont->vector[c] = font->glyphs[glyph];
920 font->latest_subfont->indices[c] = glyph;
921 font->latest_subfont->to_unicode[c] = *str;
922
923 sf = font->latest_subfont;
924 }
925
926 if (!subfont || sf != subfont) {
927 if (subfont) {
928 text[textpos] = '\0';
929 add_string_to_page(page, x, y, subfont, fontsize, text);
930 x += textwid;
931 } else {
932 assert(textpos == 0);
933 }
934 textpos = 0;
935 subfont = sf;
936 }
937
938 text[textpos++] = font->subfont_map[glyph].position;
939 textwid += font->widths[glyph] * fontsize;
940
941 str++;
942 }
943
944 if (textpos > 0) {
945 text[textpos] = '\0';
946 add_string_to_page(page, x, y, subfont, fontsize, text);
947 x += textwid;
948 }
949
950 return x;
951 }
952
953 /*
954 * Returns the updated x coordinate.
955 */
956 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
957 int x, int y, word *text, word *text_end, xref **xr,
958 int shortfall, int nspaces, int *nspace,
959 keywordlist *keywords)
960 {
961 while (text && text != text_end) {
962 int style, type, findex, errs;
963 wchar_t *str;
964 xref_dest dest;
965
966 switch (text->type) {
967 /*
968 * Start a cross-reference.
969 */
970 case word_HyperLink:
971 case word_UpperXref:
972 case word_LowerXref:
973
974 if (text->type == word_HyperLink) {
975 dest.type = URL;
976 dest.url = utoa_dup(text->text);
977 dest.page = NULL;
978 } else {
979 keyword *kwl = kw_lookup(keywords, text->text);
980 para_data *pdata;
981
982 if (kwl) {
983 assert(kwl->para->private_data);
984 pdata = (para_data *) kwl->para->private_data;
985 dest.type = PAGE;
986 dest.page = pdata->first->page;
987 dest.url = NULL;
988 } else {
989 /*
990 * Shouldn't happen, but *shrug*
991 */
992 dest.type = NONE;
993 dest.page = NULL;
994 dest.url = NULL;
995 }
996 }
997 if (dest.type != NONE) {
998 *xr = mknew(xref);
999 (*xr)->dest = dest; /* structure copy */
1000 if (page->last_xref)
1001 page->last_xref->next = *xr;
1002 else
1003 page->first_xref = *xr;
1004 page->last_xref = *xr;
1005
1006 /*
1007 * FIXME: Ideally we should have, and use, some
1008 * vertical font metric information here so that
1009 * our cross-ref rectangle can take account of
1010 * descenders and the font's cap height. This will
1011 * do for the moment, but it isn't ideal.
1012 */
1013 (*xr)->lx = (*xr)->rx = x;
1014 (*xr)->by = y;
1015 (*xr)->ty = y + ldata->line_height;
1016 }
1017 goto nextword;
1018
1019 /*
1020 * Finish extending a cross-reference box.
1021 */
1022 case word_HyperEnd:
1023 case word_XrefEnd:
1024 *xr = NULL;
1025 goto nextword;
1026
1027 case word_IndexRef:
1028 goto nextword;
1029 /*
1030 * FIXME: we should do something with all of these!
1031 * Hyperlinks and xrefs have meaning in PDF, and this
1032 * is probably the right place to nail down the index
1033 * references too.
1034 */
1035 }
1036
1037 style = towordstyle(text->type);
1038 type = removeattr(text->type);
1039
1040 findex = (style == word_Normal ? FONT_NORMAL :
1041 style == word_Emph ? FONT_EMPH :
1042 FONT_CODE);
1043
1044 if (type == word_Normal) {
1045 str = text->text;
1046 } else if (type == word_WhiteSpace) {
1047 x += pdata->sizes[findex] *
1048 string_width(pdata->fonts[findex], L" ", NULL);
1049 if (nspaces && findex != FONT_CODE) {
1050 x += (*nspace+1) * shortfall / nspaces;
1051 x -= *nspace * shortfall / nspaces;
1052 (*nspace)++;
1053 }
1054 goto nextword;
1055 } else /* if (type == word_Quote) */ {
1056 if (text->aux == quote_Open)
1057 str = L"\x2018"; /* FIXME: configurability! */
1058 else
1059 str = L"\x2019"; /* FIXME: configurability! */
1060 }
1061
1062 (void) string_width(pdata->fonts[findex], str, &errs);
1063
1064 if (errs && text->alt)
1065 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1066 xr, shortfall, nspaces, nspace, keywords);
1067 else
1068 x = render_string(page, pdata->fonts[findex],
1069 pdata->sizes[findex], x, y, str);
1070
1071 if (*xr)
1072 (*xr)->rx = x;
1073
1074 nextword:
1075 text = text->next;
1076 }
1077
1078 return x;
1079 }
1080
1081 static void render_line(line_data *ldata, int left_x, int top_y,
1082 xref_dest *dest, keywordlist *keywords)
1083 {
1084 int nspace;
1085 xref *xr;
1086
1087 if (ldata->aux_text) {
1088 int x;
1089 xr = NULL;
1090 nspace = 0;
1091 x = render_text(ldata->page, ldata->pdata, ldata,
1092 left_x + ldata->aux_left_indent,
1093 top_y - ldata->ypos,
1094 ldata->aux_text, NULL, &xr, 0, 0, &nspace, keywords);
1095 if (ldata->aux_text_2)
1096 render_text(ldata->page, ldata->pdata, ldata,
1097 x, top_y - ldata->ypos,
1098 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, keywords);
1099 }
1100 nspace = 0;
1101
1102 /*
1103 * There might be a cross-reference carried over from a
1104 * previous line.
1105 */
1106 if (dest->type != NONE) {
1107 xr = mknew(xref);
1108 xr->dest = *dest; /* structure copy */
1109 if (ldata->page->last_xref)
1110 ldata->page->last_xref->next = xr;
1111 else
1112 ldata->page->first_xref = xr;
1113 ldata->page->last_xref = xr;
1114 xr->lx = xr->rx = left_x + ldata->xpos;
1115 xr->by = top_y - ldata->ypos;
1116 xr->ty = top_y - ldata->ypos + ldata->line_height;
1117 } else
1118 xr = NULL;
1119
1120 render_text(ldata->page, ldata->pdata, ldata, left_x + ldata->xpos,
1121 top_y - ldata->ypos, ldata->first, ldata->end, &xr,
1122 ldata->hshortfall, ldata->nspaces, &nspace, keywords);
1123
1124 if (xr) {
1125 /*
1126 * There's a cross-reference continued on to the next line.
1127 */
1128 *dest = xr->dest;
1129 } else
1130 dest->type = NONE;
1131 }
1132
1133 static void code_paragraph(para_data *pdata,
1134 font_data *fn, font_data *fi, font_data *fb,
1135 int font_size, int indent, word *words)
1136 {
1137 /*
1138 * For code paragraphs, I'm going to hack grievously and
1139 * pretend the three normal fonts are the three code paragraph
1140 * fonts.
1141 */
1142 pdata->fonts[FONT_NORMAL] = fb;
1143 pdata->fonts[FONT_EMPH] = fi;
1144 pdata->fonts[FONT_CODE] = fn;
1145 pdata->sizes[FONT_NORMAL] =
1146 pdata->sizes[FONT_EMPH] =
1147 pdata->sizes[FONT_CODE] = font_size;
1148
1149 pdata->first = pdata->last = NULL;
1150
1151 for (; words; words = words->next) {
1152 wchar_t *t, *e, *start;
1153 word *lhead = NULL, *ltail = NULL, *w;
1154 line_data *ldata;
1155 int prev = -1, curr;
1156
1157 t = words->text;
1158 if (words->next && words->next->type == word_Emph) {
1159 e = words->next->text;
1160 words = words->next;
1161 } else
1162 e = NULL;
1163
1164 start = t;
1165
1166 while (*start) {
1167 while (*t) {
1168 if (!e || !*e)
1169 curr = 0;
1170 else if (*e == L'i')
1171 curr = 1;
1172 else if (*e == L'b')
1173 curr = 2;
1174 else
1175 curr = 0;
1176
1177 if (prev < 0)
1178 prev = curr;
1179
1180 if (curr != prev)
1181 break;
1182
1183 t++;
1184 if (e && *e)
1185 e++;
1186 }
1187
1188 /*
1189 * We've isolated a maximal subsequence of the line
1190 * which has the same emphasis. Form it into a word
1191 * structure.
1192 */
1193 w = mknew(word);
1194 w->next = NULL;
1195 w->alt = NULL;
1196 w->type = (prev == 0 ? word_WeakCode :
1197 prev == 1 ? word_Emph : word_Normal);
1198 w->text = mknewa(wchar_t, t-start+1);
1199 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
1200 w->text[t-start] = '\0';
1201 w->breaks = FALSE;
1202
1203 if (ltail)
1204 ltail->next = w;
1205 else
1206 lhead = w;
1207 ltail = w;
1208
1209 start = t;
1210 prev = -1;
1211 }
1212
1213 ldata = mknew(line_data);
1214
1215 ldata->pdata = pdata;
1216 ldata->first = lhead;
1217 ldata->end = NULL;
1218 ldata->line_height = font_size * 4096;
1219
1220 ldata->xpos = indent;
1221
1222 if (pdata->last) {
1223 pdata->last->next = ldata;
1224 ldata->prev = pdata->last;
1225 } else {
1226 pdata->first = ldata;
1227 ldata->prev = NULL;
1228 }
1229 ldata->next = NULL;
1230 pdata->last = ldata;
1231
1232 ldata->hshortfall = 0;
1233 ldata->nspaces = 0;
1234 ldata->aux_text = NULL;
1235 ldata->aux_text_2 = NULL;
1236 ldata->aux_left_indent = 0;
1237
1238 }
1239 }