2 * Paper printing pre-backend for Halibut.
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
15 * - set up contents section now we know what sections begin on
20 * - header/footer? Page numbers at least would be handy. Fully
21 * configurable footer can wait, though.
23 * That should bring us to the same level of functionality that
24 * original-Halibut had, and the same in PDF plus the obvious
25 * interactive navigation features. After that, in future work:
27 * - linearised PDF, perhaps?
29 * - I'm uncertain of whether I need to include a ToUnicode CMap
30 * in each of my font definitions in PDF. Currently things (by
31 * which I mean cut and paste out of acroread) seem to be
32 * working fairly happily without it, but I don't know.
45 typedef struct paper_conf_Tag paper_conf
;
47 struct paper_conf_Tag
{
54 int indent_list_bullet
;
58 int base_para_spacing
;
59 int chapter_top_space
;
60 int sect_num_left_space
;
61 int chapter_underline_depth
;
62 int chapter_underline_thickness
;
65 int contents_indent_step
;
67 int leader_separation
;
68 /* These are derived from the above */
71 /* Fonts used in the configuration */
72 font_data
*tr
, *ti
, *hr
, *hi
, *cr
, *co
, *cb
;
75 static font_data
*make_std_font(font_list
*fontlist
, char const *name
);
76 static void wrap_paragraph(para_data
*pdata
, word
*words
,
77 int w
, int i1
, int i2
);
78 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
80 static int render_string(page_data
*page
, font_data
*font
, int fontsize
,
81 int x
, int y
, wchar_t *str
);
82 static int render_line(line_data
*ldata
, int left_x
, int top_y
,
83 xref_dest
*dest
, keywordlist
*keywords
);
84 static int paper_width_simple(para_data
*pdata
, word
*text
);
85 static para_data
*code_paragraph(int indent
, word
*words
, paper_conf
*conf
);
86 static para_data
*rule_paragraph(int indent
, paper_conf
*conf
);
87 static void add_rect_to_page(page_data
*page
, int x
, int y
, int w
, int h
);
88 static para_data
*make_para_data(int ptype
, int paux
, int indent
, int rmargin
,
89 word
*pkwtext
, word
*pkwtext2
, word
*pwords
,
91 static void standard_line_spacing(para_data
*pdata
, paper_conf
*conf
);
92 static wchar_t *prepare_outline_title(word
*first
, wchar_t *separator
,
94 static word
*fake_word(wchar_t *text
);
95 static word
*prepare_contents_title(word
*first
, wchar_t *separator
,
98 void *paper_pre_backend(paragraph
*sourceform
, keywordlist
*keywords
,
102 int indent
, used_contents
;
103 para_data
*pdata
, *firstpara
= NULL
, *lastpara
= NULL
;
104 para_data
*firstcont
, *lastcont
;
105 line_data
*ldata
, *firstline
, *lastline
, *firstcontline
, *lastcontline
;
111 * FIXME: All these things ought to become configurable.
113 conf
= mknew(paper_conf
);
114 conf
->paper_width
= 595 * 4096;
115 conf
->paper_height
= 841 * 4096;
116 conf
->left_margin
= 72 * 4096;
117 conf
->top_margin
= 72 * 4096;
118 conf
->right_margin
= 72 * 4096;
119 conf
->bottom_margin
= 108 * 4096;
120 conf
->indent_list_bullet
= 6 * 4096;
121 conf
->indent_list
= 24 * 4096;
122 conf
->indent_quote
= 18 * 4096;
123 conf
->base_leading
= 4096;
124 conf
->base_para_spacing
= 10 * 4096;
125 conf
->chapter_top_space
= 72 * 4096;
126 conf
->sect_num_left_space
= 12 * 4096;
127 conf
->chapter_underline_depth
= 14 * 4096;
128 conf
->chapter_underline_thickness
= 3 * 4096;
129 conf
->rule_thickness
= 1 * 4096;
130 conf
->base_font_size
= 12;
131 conf
->contents_indent_step
= 24 * 4096;
132 conf
->contents_margin
= 84 * 4096;
133 conf
->leader_separation
= 12 * 4096;
136 conf
->paper_width
- conf
->left_margin
- conf
->right_margin
;
138 conf
->paper_height
- conf
->top_margin
- conf
->bottom_margin
;
140 IGNORE(idx
); /* FIXME */
143 * First, set up some font structures.
145 fontlist
= mknew(font_list
);
146 fontlist
->head
= fontlist
->tail
= NULL
;
147 conf
->tr
= make_std_font(fontlist
, "Times-Roman");
148 conf
->ti
= make_std_font(fontlist
, "Times-Italic");
149 conf
->hr
= make_std_font(fontlist
, "Helvetica-Bold");
150 conf
->hi
= make_std_font(fontlist
, "Helvetica-BoldOblique");
151 conf
->cr
= make_std_font(fontlist
, "Courier");
152 conf
->co
= make_std_font(fontlist
, "Courier-Oblique");
153 conf
->cb
= make_std_font(fontlist
, "Courier-Bold");
156 * Format the contents entry for each heading.
159 word
*contents_title
;
160 contents_title
= fake_word(L
"Contents");
162 firstcont
= make_para_data(para_UnnumberedChapter
, 0, 0, 0,
163 NULL
, NULL
, contents_title
, conf
);
164 lastcont
= firstcont
;
165 lastcont
->next
= NULL
;
166 firstcontline
= firstcont
->first
;
167 lastcontline
= lastcont
->last
;
168 for (p
= sourceform
; p
; p
= p
->next
) {
175 case para_UnnumberedChapter
:
181 words
= prepare_contents_title(p
->kwtext
, L
": ", p
->words
);
184 case para_UnnumberedChapter
:
185 words
= prepare_contents_title(NULL
, NULL
, p
->words
);
190 words
= prepare_contents_title(p
->kwtext2
, L
" ", p
->words
);
191 indent
= (p
->aux
+ 1) * conf
->contents_indent_step
;
194 pdata
= make_para_data(para_Normal
, p
->aux
, indent
,
195 conf
->contents_margin
,
196 NULL
, NULL
, words
, conf
);
198 pdata
->contents_entry
= p
;
199 lastcont
->next
= pdata
;
203 * Link all contents line structures together into
208 lastcontline
->next
= pdata
->first
;
209 pdata
->first
->prev
= lastcontline
;
211 firstcontline
= pdata
->first
;
212 pdata
->first
->prev
= NULL
;
214 lastcontline
= pdata
->last
;
215 lastcontline
->next
= NULL
;
224 * Do the main paragraph formatting.
227 used_contents
= FALSE
;
228 firstline
= lastline
= NULL
;
229 for (p
= sourceform
; p
; p
= p
->next
) {
230 p
->private_data
= NULL
;
234 * These paragraph types are either invisible or don't
235 * define text in the normal sense. Either way, they
236 * don't require wrapping.
241 case para_NotParaType
:
248 * These paragraph types don't require wrapping, but
249 * they do affect the line width to which we wrap the
250 * rest of the paragraphs, so we need to pay attention.
253 indent
+= conf
->indent_list
; break;
255 indent
-= conf
->indent_list
; assert(indent
>= 0); break;
257 indent
+= conf
->indent_quote
; break;
259 indent
-= conf
->indent_quote
; assert(indent
>= 0); break;
262 * This paragraph type is special. Process it
266 pdata
= code_paragraph(indent
, p
->words
, conf
);
267 p
->private_data
= pdata
;
268 if (pdata
->first
!= pdata
->last
) {
269 pdata
->first
->penalty_after
+= 100000;
270 pdata
->last
->penalty_before
+= 100000;
275 * This paragraph is also special.
278 pdata
= rule_paragraph(indent
, conf
);
279 p
->private_data
= pdata
;
283 * All of these paragraph types require wrapping in the
284 * ordinary way. So we must supply a set of fonts, a
285 * line width and auxiliary information (e.g. bullet
286 * text) for each one.
290 case para_UnnumberedChapter
:
294 case para_BiblioCited
:
296 case para_NumberedList
:
297 case para_DescribedThing
:
298 case para_Description
:
301 pdata
= make_para_data(p
->type
, p
->aux
, indent
, 0,
302 p
->kwtext
, p
->kwtext2
, p
->words
, conf
);
304 p
->private_data
= pdata
;
309 if (p
->private_data
) {
310 pdata
= (para_data
*)p
->private_data
;
313 * If this is the first non-title heading, we link the
314 * contents section in before it.
316 if (!used_contents
&& pdata
->outline_level
> 0) {
317 used_contents
= TRUE
;
319 lastpara
->next
= firstcont
;
321 firstpara
= firstcont
;
323 assert(lastpara
->next
== NULL
);
326 lastline
->next
= firstcontline
;
327 firstcontline
->prev
= lastline
;
329 firstline
= firstcontline
;
330 firstcontline
->prev
= NULL
;
332 assert(lastcontline
!= NULL
);
333 lastline
= lastcontline
;
334 lastline
->next
= NULL
;
338 * Link all line structures together into a big list.
342 lastline
->next
= pdata
->first
;
343 pdata
->first
->prev
= lastline
;
345 firstline
= pdata
->first
;
346 pdata
->first
->prev
= NULL
;
348 lastline
= pdata
->last
;
349 lastline
->next
= NULL
;
353 * Link all paragraph structures together similarly.
357 lastpara
->next
= pdata
;
365 * Now we have an enormous linked list of every line of text in
366 * the document. Break it up into pages.
368 pages
= page_breaks(firstline
, lastline
, conf
->page_height
);
376 for (page
= pages
; page
; page
= page
->next
) {
378 sprintf(buf
, "%d", ++num
);
379 page
->number
= ufroma_dup(buf
);
384 * Now we're ready to actually lay out the pages. We do this by
385 * looping over _paragraphs_, since we may need to track cross-
386 * references between lines and even across pages.
388 for (pdata
= firstpara
; pdata
; pdata
= pdata
->next
) {
392 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
393 last_x
= render_line(ldata
, conf
->left_margin
,
394 conf
->paper_height
- conf
->top_margin
,
396 if (ldata
== pdata
->last
)
401 * If this is a contents entry, add leaders and a page
404 if (pdata
->contents_entry
) {
411 assert(pdata
->contents_entry
->private_data
);
412 target
= (para_data
*)pdata
->contents_entry
->private_data
;
413 num
= target
->first
->page
->number
;
416 wid
= paper_width_simple(pdata
, w
);
419 render_string(pdata
->last
->page
,
420 pdata
->fonts
[FONT_NORMAL
],
421 pdata
->sizes
[FONT_NORMAL
],
422 conf
->paper_width
- conf
->right_margin
- wid
,
423 (conf
->paper_height
- conf
->top_margin
-
424 pdata
->last
->ypos
), num
);
426 for (x
= 0; x
< conf
->base_width
; x
+= conf
->leader_separation
)
427 if (x
- conf
->leader_separation
> last_x
- conf
->left_margin
&&
428 x
+ conf
->leader_separation
< conf
->base_width
- wid
)
429 render_string(pdata
->last
->page
,
430 pdata
->fonts
[FONT_NORMAL
],
431 pdata
->sizes
[FONT_NORMAL
],
432 conf
->left_margin
+ x
,
433 (conf
->paper_height
- conf
->top_margin
-
434 pdata
->last
->ypos
), L
".");
438 * Render any rectangle (chapter title underline or rule)
439 * that goes with this paragraph.
441 switch (pdata
->rect_type
) {
442 case RECT_CHAPTER_UNDERLINE
:
443 add_rect_to_page(pdata
->last
->page
,
445 (conf
->paper_height
- conf
->top_margin
-
447 conf
->chapter_underline_depth
),
449 conf
->chapter_underline_thickness
);
452 add_rect_to_page(pdata
->first
->page
,
453 conf
->left_margin
+ pdata
->first
->xpos
,
454 (conf
->paper_height
- conf
->top_margin
-
456 pdata
->last
->line_height
),
457 conf
->base_width
- pdata
->first
->xpos
,
458 pdata
->last
->line_height
);
460 default: /* placate gcc */
466 * Start putting together the overall document structure we're
469 doc
= mknew(document
);
470 doc
->fonts
= fontlist
;
472 doc
->paper_width
= conf
->paper_width
;
473 doc
->paper_height
= conf
->paper_height
;
476 * Collect the section heading paragraphs into a document
477 * outline. This is slightly fiddly because the Title paragraph
478 * isn't required to be at the start, although all the others
484 doc
->outline_elements
= mknewa(outline_element
, osize
);
485 doc
->n_outline_elements
= 0;
487 /* First find the title. */
488 for (pdata
= firstpara
; pdata
; pdata
= pdata
->next
) {
489 if (pdata
->outline_level
== 0) {
490 doc
->outline_elements
[0].level
= 0;
491 doc
->outline_elements
[0].pdata
= pdata
;
492 doc
->n_outline_elements
++;
497 /* Then collect the rest. */
498 for (pdata
= firstpara
; pdata
; pdata
= pdata
->next
) {
499 if (pdata
->outline_level
> 0) {
500 if (doc
->n_outline_elements
>= osize
) {
502 doc
->outline_elements
=
503 resize(doc
->outline_elements
, osize
);
506 doc
->outline_elements
[doc
->n_outline_elements
].level
=
507 pdata
->outline_level
;
508 doc
->outline_elements
[doc
->n_outline_elements
].pdata
= pdata
;
509 doc
->n_outline_elements
++;
519 static para_data
*make_para_data(int ptype
, int paux
, int indent
, int rmargin
,
520 word
*pkwtext
, word
*pkwtext2
, word
*pwords
,
525 int extra_indent
, firstline_indent
, aux_indent
;
528 pdata
= mknew(para_data
);
529 pdata
->outline_level
= -1;
530 pdata
->outline_title
= NULL
;
531 pdata
->rect_type
= RECT_NONE
;
532 pdata
->contents_entry
= NULL
;
535 * Choose fonts for this paragraph.
537 * FIXME: All of this ought to be completely
542 pdata
->fonts
[FONT_NORMAL
] = conf
->hr
;
543 pdata
->sizes
[FONT_NORMAL
] = 24;
544 pdata
->fonts
[FONT_EMPH
] = conf
->hi
;
545 pdata
->sizes
[FONT_EMPH
] = 24;
546 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
547 pdata
->sizes
[FONT_CODE
] = 24;
548 pdata
->outline_level
= 0;
553 case para_UnnumberedChapter
:
554 pdata
->fonts
[FONT_NORMAL
] = conf
->hr
;
555 pdata
->sizes
[FONT_NORMAL
] = 20;
556 pdata
->fonts
[FONT_EMPH
] = conf
->hi
;
557 pdata
->sizes
[FONT_EMPH
] = 20;
558 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
559 pdata
->sizes
[FONT_CODE
] = 20;
560 pdata
->outline_level
= 1;
565 pdata
->fonts
[FONT_NORMAL
] = conf
->hr
;
566 pdata
->fonts
[FONT_EMPH
] = conf
->hi
;
567 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
568 pdata
->sizes
[FONT_NORMAL
] =
569 pdata
->sizes
[FONT_EMPH
] =
570 pdata
->sizes
[FONT_CODE
] =
571 (paux
== 0 ?
16 : paux
== 1 ?
14 : 13);
572 pdata
->outline_level
= 2 + paux
;
576 case para_BiblioCited
:
578 case para_NumberedList
:
579 case para_DescribedThing
:
580 case para_Description
:
582 pdata
->fonts
[FONT_NORMAL
] = conf
->tr
;
583 pdata
->sizes
[FONT_NORMAL
] = 12;
584 pdata
->fonts
[FONT_EMPH
] = conf
->ti
;
585 pdata
->sizes
[FONT_EMPH
] = 12;
586 pdata
->fonts
[FONT_CODE
] = conf
->cr
;
587 pdata
->sizes
[FONT_CODE
] = 12;
592 * Also select an indentation level depending on the
593 * paragraph type (list paragraphs other than
594 * para_DescribedThing need extra indent).
596 * (FIXME: Perhaps at some point we might even arrange
597 * for the user to be able to request indented first
598 * lines in paragraphs.)
600 if (ptype
== para_Bullet
||
601 ptype
== para_NumberedList
||
602 ptype
== para_Description
) {
603 extra_indent
= firstline_indent
= conf
->indent_list
;
605 extra_indent
= firstline_indent
= 0;
609 * Find the auxiliary text for this paragraph.
620 * For some heading styles (FIXME: be able to
621 * configure which), the auxiliary text contains
622 * the chapter number and is arranged to be
623 * right-aligned a few points left of the primary
624 * margin. For other styles, the auxiliary text is
625 * the full chapter _name_ and takes up space
626 * within the (wrapped) chapter title, meaning that
627 * we must move the first line indent over to make
630 if (ptype
== para_Heading
|| ptype
== para_Subsect
) {
634 len
= paper_width_simple(pdata
, pkwtext2
);
635 aux_indent
= -len
- conf
->sect_num_left_space
;
637 pdata
->outline_title
=
638 prepare_outline_title(pkwtext2
, L
" ", pwords
);
641 aux2
= fake_word(L
": ");
644 firstline_indent
+= paper_width_simple(pdata
, aux
);
645 firstline_indent
+= paper_width_simple(pdata
, aux2
);
647 pdata
->outline_title
=
648 prepare_outline_title(pkwtext
, L
": ", pwords
);
654 * Auxiliary text consisting of a bullet. (FIXME:
655 * configurable bullet.)
657 aux
= fake_word(L
"\x2022");
658 aux_indent
= indent
+ conf
->indent_list_bullet
;
661 case para_NumberedList
:
663 * Auxiliary text consisting of the number followed
664 * by a (FIXME: configurable) full stop.
667 aux2
= fake_word(L
".");
668 aux_indent
= indent
+ conf
->indent_list_bullet
;
671 case para_BiblioCited
:
673 * Auxiliary text consisting of the bibliography
674 * reference text, and a trailing space.
677 aux2
= fake_word(L
" ");
679 firstline_indent
+= paper_width_simple(pdata
, aux
);
680 firstline_indent
+= paper_width_simple(pdata
, aux2
);
684 if (pdata
->outline_level
>= 0 && !pdata
->outline_title
) {
685 pdata
->outline_title
=
686 prepare_outline_title(NULL
, NULL
, pwords
);
689 wrap_paragraph(pdata
, pwords
, conf
->base_width
- rmargin
,
690 indent
+ firstline_indent
,
691 indent
+ extra_indent
);
693 pdata
->first
->aux_text
= aux
;
694 pdata
->first
->aux_text_2
= aux2
;
695 pdata
->first
->aux_left_indent
= aux_indent
;
698 * Line breaking penalties.
705 case para_UnnumberedChapter
:
707 * Fixed and large penalty for breaking straight
708 * after a heading; corresponding bonus for
709 * breaking straight before.
711 pdata
->first
->penalty_before
= -500000;
712 pdata
->last
->penalty_after
= 500000;
713 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
)
714 ldata
->penalty_after
= 500000;
717 case para_DescribedThing
:
719 * This is treated a bit like a small heading:
720 * there's a penalty for breaking after it (i.e.
721 * between it and its description), and a bonus for
722 * breaking before it (actually _between_ list
725 pdata
->first
->penalty_before
= -200000;
726 pdata
->last
->penalty_after
= 200000;
731 * Most paragraph types: widow/orphan control by
732 * discouraging breaking one line from the end of
735 if (pdata
->first
!= pdata
->last
) {
736 pdata
->first
->penalty_after
= 100000;
737 pdata
->last
->penalty_before
= 100000;
742 standard_line_spacing(pdata
, conf
);
745 * Some kinds of section heading require a page break before
746 * them and an underline after.
748 if (ptype
== para_Title
||
749 ptype
== para_Chapter
||
750 ptype
== para_Appendix
||
751 ptype
== para_UnnumberedChapter
) {
752 pdata
->first
->page_break
= TRUE
;
753 pdata
->first
->space_before
= conf
->chapter_top_space
;
754 pdata
->last
->space_after
+=
755 (conf
->chapter_underline_depth
+
756 conf
->chapter_underline_thickness
);
757 pdata
->rect_type
= RECT_CHAPTER_UNDERLINE
;
763 static void standard_line_spacing(para_data
*pdata
, paper_conf
*conf
)
768 * Set the line spacing for each line in this paragraph.
770 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
771 if (ldata
== pdata
->first
)
772 ldata
->space_before
= conf
->base_para_spacing
/ 2;
774 ldata
->space_before
= conf
->base_leading
/ 2;
775 if (ldata
== pdata
->last
)
776 ldata
->space_after
= conf
->base_para_spacing
/ 2;
778 ldata
->space_after
= conf
->base_leading
/ 2;
779 ldata
->page_break
= FALSE
;
783 static font_encoding
*new_font_encoding(font_data
*font
)
788 fe
= mknew(font_encoding
);
791 if (font
->list
->tail
)
792 font
->list
->tail
->next
= fe
;
794 font
->list
->head
= fe
;
795 font
->list
->tail
= fe
;
800 for (i
= 0; i
< 256; i
++) {
801 fe
->vector
[i
] = NULL
;
803 fe
->to_unicode
[i
] = 0xFFFF;
809 static font_data
*make_std_font(font_list
*fontlist
, char const *name
)
817 widths
= ps_std_font_widths(name
);
821 for (nglyphs
= 0; ps_std_glyphs
[nglyphs
] != NULL
; nglyphs
++);
823 f
= mknew(font_data
);
827 f
->nglyphs
= nglyphs
;
828 f
->glyphs
= ps_std_glyphs
;
830 f
->subfont_map
= mknewa(subfont_map_entry
, nglyphs
);
833 * Our first subfont will contain all of US-ASCII. This isn't
834 * really necessary - we could just create custom subfonts
835 * precisely as the whim of render_string dictated - but
836 * instinct suggests that it might be nice to have the text in
837 * the output files look _marginally_ recognisable.
839 fe
= new_font_encoding(f
);
840 fe
->free_pos
= 0xA1; /* only the top half is free */
841 f
->latest_subfont
= fe
;
843 for (i
= 0; i
< (int)lenof(f
->bmp
); i
++)
846 for (i
= 0; i
< nglyphs
; i
++) {
848 ucs
= ps_glyph_to_unicode(f
->glyphs
[i
]);
849 assert(ucs
!= 0xFFFF);
851 if (ucs
>= 0x20 && ucs
<= 0x7E) {
852 fe
->vector
[ucs
] = f
->glyphs
[i
];
853 fe
->indices
[ucs
] = i
;
854 fe
->to_unicode
[ucs
] = ucs
;
855 f
->subfont_map
[i
].subfont
= fe
;
856 f
->subfont_map
[i
].position
= ucs
;
859 * This character is not yet assigned to a subfont.
861 f
->subfont_map
[i
].subfont
= NULL
;
862 f
->subfont_map
[i
].position
= 0;
869 static int string_width(font_data
*font
, wchar_t const *string
, int *errs
)
876 for (; *string
; string
++) {
879 index
= font
->bmp
[(unsigned short)*string
];
880 if (index
== 0xFFFF) {
884 width
+= font
->widths
[index
];
891 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
);
893 struct paper_width_ctx
{
898 static int paper_width_list(void *vctx
, word
*text
, word
*end
, int *nspaces
) {
900 while (text
&& text
!= end
) {
901 w
+= paper_width_internal(vctx
, text
, nspaces
);
907 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
)
909 struct paper_width_ctx
*ctx
= (struct paper_width_ctx
*)vctx
;
910 int style
, type
, findex
, width
, errs
;
913 switch (word
->type
) {
923 style
= towordstyle(word
->type
);
924 type
= removeattr(word
->type
);
926 findex
= (style
== word_Normal ? FONT_NORMAL
:
927 style
== word_Emph ? FONT_EMPH
:
930 if (type
== word_Normal
) {
932 } else if (type
== word_WhiteSpace
) {
933 if (findex
!= FONT_CODE
) {
936 return ctx
->minspacewidth
;
939 } else /* if (type == word_Quote) */ {
940 if (word
->aux
== quote_Open
)
941 str
= L
"\x2018"; /* FIXME: configurability! */
943 str
= L
"\x2019"; /* FIXME: configurability! */
946 width
= string_width(ctx
->pdata
->fonts
[findex
], str
, &errs
);
948 if (errs
&& word
->alt
)
949 return paper_width_list(vctx
, word
->alt
, NULL
, nspaces
);
951 return ctx
->pdata
->sizes
[findex
] * width
;
954 static int paper_width(void *vctx
, word
*word
)
956 return paper_width_internal(vctx
, word
, NULL
);
959 static int paper_width_simple(para_data
*pdata
, word
*text
)
961 struct paper_width_ctx ctx
;
965 (pdata
->sizes
[FONT_NORMAL
] *
966 string_width(pdata
->fonts
[FONT_NORMAL
], L
" ", NULL
));
968 return paper_width_list(&ctx
, text
, NULL
, NULL
);
971 static void wrap_paragraph(para_data
*pdata
, word
*words
,
972 int w
, int i1
, int i2
)
974 wrappedline
*wrapping
, *p
;
976 struct paper_width_ctx ctx
;
980 * We're going to need to store the line height in every line
981 * structure we generate.
986 for (i
= 0; i
< NFONTS
; i
++)
987 if (line_height
< pdata
->sizes
[i
])
988 line_height
= pdata
->sizes
[i
];
992 spacewidth
= (pdata
->sizes
[FONT_NORMAL
] *
993 string_width(pdata
->fonts
[FONT_NORMAL
], L
" ", NULL
));
994 if (spacewidth
== 0) {
996 * A font without a space?! Disturbing. I hope this never
997 * comes up, but I'll make a random guess anyway and set my
998 * space width to half the point size.
1000 spacewidth
= pdata
->sizes
[FONT_NORMAL
] * 4096 / 2;
1004 * I'm going to set the _minimum_ space width to 3/5 of the
1005 * standard one, and use the standard one as the optimum.
1007 ctx
.minspacewidth
= spacewidth
* 3 / 5;
1010 wrapping
= wrap_para(words
, w
- i1
, w
- i2
, paper_width
, &ctx
, spacewidth
);
1013 * Having done the wrapping, we now concoct a set of line_data
1016 pdata
->first
= pdata
->last
= NULL
;
1018 for (p
= wrapping
; p
; p
= p
->next
) {
1021 int len
, wid
, spaces
;
1023 ldata
= mknew(line_data
);
1025 ldata
->pdata
= pdata
;
1026 ldata
->first
= p
->begin
;
1027 ldata
->end
= p
->end
;
1028 ldata
->line_height
= line_height
;
1030 ldata
->xpos
= (p
== wrapping ? i1
: i2
);
1033 pdata
->last
->next
= ldata
;
1034 ldata
->prev
= pdata
->last
;
1036 pdata
->first
= ldata
;
1040 pdata
->last
= ldata
;
1043 len
= paper_width_list(&ctx
, ldata
->first
, ldata
->end
, &spaces
);
1044 wid
= (p
== wrapping ? w
- i1
: w
- i2
);
1047 ldata
->hshortfall
= wid
- len
;
1048 ldata
->nspaces
= spaces
;
1050 * This tells us how much the space width needs to
1051 * change from _min_spacewidth. But we want to store
1052 * its difference from the _natural_ space width, to
1053 * make the text rendering easier.
1055 ldata
->hshortfall
+= ctx
.minspacewidth
* spaces
;
1056 ldata
->hshortfall
-= spacewidth
* spaces
;
1058 * Special case: on the last line of a paragraph, we
1059 * never stretch spaces.
1061 if (ldata
->hshortfall
> 0 && !p
->next
)
1062 ldata
->hshortfall
= 0;
1064 ldata
->aux_text
= NULL
;
1065 ldata
->aux_text_2
= NULL
;
1066 ldata
->aux_left_indent
= 0;
1067 ldata
->penalty_before
= ldata
->penalty_after
= 0;
1072 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
1079 * Page breaking is done by a close analogue of the optimal
1080 * paragraph wrapping algorithm used by wrap_para(). We work
1081 * backwards from the end of the document line by line; for
1082 * each line, we contemplate every possible number of lines we
1083 * could put on a page starting with that line, determine a
1084 * cost function for each one, add it to the pre-computed cost
1085 * function for optimally page-breaking everything after that
1086 * page, and pick the best option.
1088 * Since my line_data structures are only used for this
1089 * purpose, I might as well just store the algorithm data
1093 for (l
= last
; l
; l
= l
->prev
) {
1094 int minheight
, text
= 0, space
= 0;
1098 for (m
= l
; m
; m
= m
->next
) {
1099 if (m
!= l
&& m
->page_break
)
1100 break; /* we've gone as far as we can */
1103 space
+= m
->prev
->space_after
;
1104 if (m
!= l
|| m
->page_break
)
1105 space
+= m
->space_before
;
1106 text
+= m
->line_height
;
1107 minheight
= text
+ space
;
1109 if (m
!= l
&& minheight
> page_height
)
1113 * Compute the cost of this arrangement, as the square
1114 * of the amount of wasted space on the page.
1115 * Exception: if this is the last page before a
1116 * mandatory break or the document end, we don't
1117 * penalise a large blank area.
1119 if (m
->next
&& !m
->next
->page_break
)
1121 int x
= page_height
- minheight
;
1128 cost
+= (x
* xf
) >> 8;
1132 if (m
->next
&& !m
->next
->page_break
) {
1133 cost
+= m
->penalty_after
;
1134 cost
+= m
->next
->penalty_before
;
1137 if (m
->next
&& !m
->next
->page_break
)
1138 cost
+= m
->next
->bestcost
;
1139 if (l
->bestcost
== -1 || l
->bestcost
> cost
) {
1141 * This is the best option yet for this starting
1145 if (m
->next
&& !m
->next
->page_break
)
1146 l
->vshortfall
= page_height
- minheight
;
1157 * Now go through the line list forwards and assemble the
1167 page
= mknew(page_data
);
1176 page
->first_line
= l
;
1177 page
->last_line
= l
->page_last
;
1179 page
->first_text
= page
->last_text
= NULL
;
1180 page
->first_xref
= page
->last_xref
= NULL
;
1181 page
->first_rect
= page
->last_rect
= NULL
;
1184 * Now assign a y-coordinate to each line on the page.
1187 for (l
= page
->first_line
; l
; l
= l
->next
) {
1188 if (l
!= page
->first_line
)
1189 space
+= l
->prev
->space_after
;
1190 if (l
!= page
->first_line
|| l
->page_break
)
1191 space
+= l
->space_before
;
1192 text
+= l
->line_height
;
1195 l
->ypos
= text
+ space
+
1196 space
* (float)page
->first_line
->vshortfall
/
1197 page
->first_line
->space
;
1199 if (l
== page
->last_line
)
1203 l
= page
->last_line
->next
;
1209 static void add_rect_to_page(page_data
*page
, int x
, int y
, int w
, int h
)
1211 rect
*r
= mknew(rect
);
1214 if (page
->last_rect
)
1215 page
->last_rect
->next
= r
;
1217 page
->first_rect
= r
;
1218 page
->last_rect
= r
;
1226 static void add_string_to_page(page_data
*page
, int x
, int y
,
1227 font_encoding
*fe
, int size
, char *text
)
1229 text_fragment
*frag
;
1231 frag
= mknew(text_fragment
);
1234 if (page
->last_text
)
1235 page
->last_text
->next
= frag
;
1237 page
->first_text
= frag
;
1238 page
->last_text
= frag
;
1243 frag
->fontsize
= size
;
1244 frag
->text
= dupstr(text
);
1248 * Returns the updated x coordinate.
1250 static int render_string(page_data
*page
, font_data
*font
, int fontsize
,
1251 int x
, int y
, wchar_t *str
)
1254 int textpos
, textwid
, glyph
;
1255 font_encoding
*subfont
= NULL
, *sf
;
1257 text
= mknewa(char, 1 + ustrlen(str
));
1258 textpos
= textwid
= 0;
1261 glyph
= font
->bmp
[*str
];
1263 if (glyph
== 0xFFFF)
1264 continue; /* nothing more we can do here */
1267 * Find which subfont this character is going in.
1269 sf
= font
->subfont_map
[glyph
].subfont
;
1275 * This character is not yet in a subfont. Assign one.
1277 if (font
->latest_subfont
->free_pos
>= 0x100)
1278 font
->latest_subfont
= new_font_encoding(font
);
1280 c
= font
->latest_subfont
->free_pos
++;
1281 if (font
->latest_subfont
->free_pos
== 0x7F)
1282 font
->latest_subfont
->free_pos
= 0xA1;
1284 font
->subfont_map
[glyph
].subfont
= font
->latest_subfont
;
1285 font
->subfont_map
[glyph
].position
= c
;
1286 font
->latest_subfont
->vector
[c
] = font
->glyphs
[glyph
];
1287 font
->latest_subfont
->indices
[c
] = glyph
;
1288 font
->latest_subfont
->to_unicode
[c
] = *str
;
1290 sf
= font
->latest_subfont
;
1293 if (!subfont
|| sf
!= subfont
) {
1295 text
[textpos
] = '\0';
1296 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
1299 assert(textpos
== 0);
1305 text
[textpos
++] = font
->subfont_map
[glyph
].position
;
1306 textwid
+= font
->widths
[glyph
] * fontsize
;
1312 text
[textpos
] = '\0';
1313 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
1321 * Returns the updated x coordinate.
1323 static int render_text(page_data
*page
, para_data
*pdata
, line_data
*ldata
,
1324 int x
, int y
, word
*text
, word
*text_end
, xref
**xr
,
1325 int shortfall
, int nspaces
, int *nspace
,
1326 keywordlist
*keywords
)
1328 while (text
&& text
!= text_end
) {
1329 int style
, type
, findex
, errs
;
1333 switch (text
->type
) {
1335 * Start a cross-reference.
1337 case word_HyperLink
:
1338 case word_UpperXref
:
1339 case word_LowerXref
:
1341 if (text
->type
== word_HyperLink
) {
1343 dest
.url
= utoa_dup(text
->text
);
1346 keyword
*kwl
= kw_lookup(keywords
, text
->text
);
1350 assert(kwl
->para
->private_data
);
1351 pdata
= (para_data
*) kwl
->para
->private_data
;
1353 dest
.page
= pdata
->first
->page
;
1357 * Shouldn't happen, but *shrug*
1364 if (dest
.type
!= NONE
) {
1366 (*xr
)->dest
= dest
; /* structure copy */
1367 if (page
->last_xref
)
1368 page
->last_xref
->next
= *xr
;
1370 page
->first_xref
= *xr
;
1371 page
->last_xref
= *xr
;
1375 * FIXME: Ideally we should have, and use, some
1376 * vertical font metric information here so that
1377 * our cross-ref rectangle can take account of
1378 * descenders and the font's cap height. This will
1379 * do for the moment, but it isn't ideal.
1381 (*xr
)->lx
= (*xr
)->rx
= x
;
1383 (*xr
)->ty
= y
+ ldata
->line_height
;
1388 * Finish extending a cross-reference box.
1398 * FIXME: we should do something with this.
1402 style
= towordstyle(text
->type
);
1403 type
= removeattr(text
->type
);
1405 findex
= (style
== word_Normal ? FONT_NORMAL
:
1406 style
== word_Emph ? FONT_EMPH
:
1409 if (type
== word_Normal
) {
1411 } else if (type
== word_WhiteSpace
) {
1412 x
+= pdata
->sizes
[findex
] *
1413 string_width(pdata
->fonts
[findex
], L
" ", NULL
);
1414 if (nspaces
&& findex
!= FONT_CODE
) {
1415 x
+= (*nspace
+1) * shortfall
/ nspaces
;
1416 x
-= *nspace
* shortfall
/ nspaces
;
1420 } else /* if (type == word_Quote) */ {
1421 if (text
->aux
== quote_Open
)
1422 str
= L
"\x2018"; /* FIXME: configurability! */
1424 str
= L
"\x2019"; /* FIXME: configurability! */
1427 (void) string_width(pdata
->fonts
[findex
], str
, &errs
);
1429 if (errs
&& text
->alt
)
1430 x
= render_text(page
, pdata
, ldata
, x
, y
, text
->alt
, NULL
,
1431 xr
, shortfall
, nspaces
, nspace
, keywords
);
1433 x
= render_string(page
, pdata
->fonts
[findex
],
1434 pdata
->sizes
[findex
], x
, y
, str
);
1447 * Returns the last x position used on the line.
1449 static int render_line(line_data
*ldata
, int left_x
, int top_y
,
1450 xref_dest
*dest
, keywordlist
*keywords
)
1456 if (ldata
->aux_text
) {
1460 x
= render_text(ldata
->page
, ldata
->pdata
, ldata
,
1461 left_x
+ ldata
->aux_left_indent
,
1462 top_y
- ldata
->ypos
,
1463 ldata
->aux_text
, NULL
, &xr
, 0, 0, &nspace
, keywords
);
1464 if (ldata
->aux_text_2
)
1465 render_text(ldata
->page
, ldata
->pdata
, ldata
,
1466 x
, top_y
- ldata
->ypos
,
1467 ldata
->aux_text_2
, NULL
, &xr
, 0, 0, &nspace
, keywords
);
1473 * There might be a cross-reference carried over from a
1476 if (dest
->type
!= NONE
) {
1479 xr
->dest
= *dest
; /* structure copy */
1480 if (ldata
->page
->last_xref
)
1481 ldata
->page
->last_xref
->next
= xr
;
1483 ldata
->page
->first_xref
= xr
;
1484 ldata
->page
->last_xref
= xr
;
1485 xr
->lx
= xr
->rx
= left_x
+ ldata
->xpos
;
1486 xr
->by
= top_y
- ldata
->ypos
;
1487 xr
->ty
= top_y
- ldata
->ypos
+ ldata
->line_height
;
1491 ret
= render_text(ldata
->page
, ldata
->pdata
, ldata
,
1492 left_x
+ ldata
->xpos
,
1493 top_y
- ldata
->ypos
, ldata
->first
, ldata
->end
, &xr
,
1494 ldata
->hshortfall
, ldata
->nspaces
, &nspace
,
1499 * There's a cross-reference continued on to the next line.
1509 static para_data
*code_paragraph(int indent
, word
*words
, paper_conf
*conf
)
1511 para_data
*pdata
= mknew(para_data
);
1514 * For code paragraphs, I'm going to hack grievously and
1515 * pretend the three normal fonts are the three code paragraph
1518 pdata
->fonts
[FONT_NORMAL
] = conf
->cb
;
1519 pdata
->fonts
[FONT_EMPH
] = conf
->co
;
1520 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
1521 pdata
->sizes
[FONT_NORMAL
] =
1522 pdata
->sizes
[FONT_EMPH
] =
1523 pdata
->sizes
[FONT_CODE
] = 12;
1525 pdata
->first
= pdata
->last
= NULL
;
1526 pdata
->outline_level
= -1;
1527 pdata
->rect_type
= RECT_NONE
;
1528 pdata
->contents_entry
= NULL
;
1530 for (; words
; words
= words
->next
) {
1531 wchar_t *t
, *e
, *start
;
1532 word
*lhead
= NULL
, *ltail
= NULL
, *w
;
1534 int prev
= -1, curr
;
1537 if (words
->next
&& words
->next
->type
== word_Emph
) {
1538 e
= words
->next
->text
;
1539 words
= words
->next
;
1549 else if (*e
== L
'i')
1551 else if (*e
== L
'b')
1568 * We've isolated a maximal subsequence of the line
1569 * which has the same emphasis. Form it into a word
1575 w
->type
= (prev
== 0 ? word_WeakCode
:
1576 prev
== 1 ? word_Emph
: word_Normal
);
1577 w
->text
= mknewa(wchar_t, t
-start
+1);
1578 memcpy(w
->text
, start
, (t
-start
) * sizeof(wchar_t));
1579 w
->text
[t
-start
] = '\0';
1592 ldata
= mknew(line_data
);
1594 ldata
->pdata
= pdata
;
1595 ldata
->first
= lhead
;
1597 ldata
->line_height
= conf
->base_font_size
* 4096;
1599 ldata
->xpos
= indent
;
1602 pdata
->last
->next
= ldata
;
1603 ldata
->prev
= pdata
->last
;
1605 pdata
->first
= ldata
;
1609 pdata
->last
= ldata
;
1611 ldata
->hshortfall
= 0;
1613 ldata
->aux_text
= NULL
;
1614 ldata
->aux_text_2
= NULL
;
1615 ldata
->aux_left_indent
= 0;
1616 /* General opprobrium for breaking in a code paragraph. */
1617 ldata
->penalty_before
= ldata
->penalty_after
= 50000;
1620 standard_line_spacing(pdata
, conf
);
1625 static para_data
*rule_paragraph(int indent
, paper_conf
*conf
)
1627 para_data
*pdata
= mknew(para_data
);
1630 ldata
= mknew(line_data
);
1632 ldata
->pdata
= pdata
;
1633 ldata
->first
= NULL
;
1635 ldata
->line_height
= conf
->rule_thickness
;
1637 ldata
->xpos
= indent
;
1642 ldata
->hshortfall
= 0;
1644 ldata
->aux_text
= NULL
;
1645 ldata
->aux_text_2
= NULL
;
1646 ldata
->aux_left_indent
= 0;
1649 * Better to break after a rule than before it
1651 ldata
->penalty_after
+= 100000;
1652 ldata
->penalty_before
+= -100000;
1654 pdata
->first
= pdata
->last
= ldata
;
1655 pdata
->outline_level
= -1;
1656 pdata
->rect_type
= RECT_RULE
;
1657 pdata
->contents_entry
= NULL
;
1659 standard_line_spacing(pdata
, conf
);
1665 * Plain-text-like formatting for outline titles.
1667 static void paper_rdaddw(rdstring
*rs
, word
*text
) {
1668 for (; text
; text
= text
->next
) switch (text
->type
) {
1669 case word_HyperLink
:
1671 case word_UpperXref
:
1672 case word_LowerXref
:
1681 case word_WhiteSpace
:
1682 case word_EmphSpace
:
1683 case word_CodeSpace
:
1684 case word_WkCodeSpace
:
1686 case word_EmphQuote
:
1687 case word_CodeQuote
:
1688 case word_WkCodeQuote
:
1689 assert(text
->type
!= word_CodeQuote
&&
1690 text
->type
!= word_WkCodeQuote
);
1691 if (towordstyle(text
->type
) == word_Emph
&&
1692 (attraux(text
->aux
) == attr_First
||
1693 attraux(text
->aux
) == attr_Only
))
1694 rdadd(rs
, L
'_'); /* FIXME: configurability */
1695 else if (towordstyle(text
->type
) == word_Code
&&
1696 (attraux(text
->aux
) == attr_First
||
1697 attraux(text
->aux
) == attr_Only
))
1698 rdadd(rs
, L
'\''); /* FIXME: configurability */
1699 if (removeattr(text
->type
) == word_Normal
) {
1700 rdadds(rs
, text
->text
);
1701 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
1703 } else if (removeattr(text
->type
) == word_Quote
) {
1704 rdadd(rs
, L
'\''); /* fixme: configurability */
1706 if (towordstyle(text
->type
) == word_Emph
&&
1707 (attraux(text
->aux
) == attr_Last
||
1708 attraux(text
->aux
) == attr_Only
))
1709 rdadd(rs
, L
'_'); /* FIXME: configurability */
1710 else if (towordstyle(text
->type
) == word_Code
&&
1711 (attraux(text
->aux
) == attr_Last
||
1712 attraux(text
->aux
) == attr_Only
))
1713 rdadd(rs
, L
'\''); /* FIXME: configurability */
1718 static wchar_t *prepare_outline_title(word
*first
, wchar_t *separator
,
1721 rdstring rs
= {0, 0, NULL
};
1724 paper_rdaddw(&rs
, first
);
1726 rdadds(&rs
, separator
);
1728 paper_rdaddw(&rs
, second
);
1733 static word
*fake_word(wchar_t *text
)
1735 word
*ret
= mknew(word
);
1738 ret
->type
= word_Normal
;
1739 ret
->text
= ustrdup(text
);
1740 ret
->breaks
= FALSE
;
1745 static word
*prepare_contents_title(word
*first
, wchar_t *separator
,
1754 w
= dup_word_list(first
);
1762 w
= fake_word(separator
);
1768 *wptr
= dup_word_list(second
);