2 * Paper printing pre-backend for Halibut.
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
15 * - set up contents section now we know what sections begin on
20 * - all the missing features in text rendering (code paragraphs,
21 * list bullets, indentation, section heading styles)
25 * That should bring us to the same level of functionality that
26 * original-Halibut had, and the same in PDF plus the obvious
27 * interactive navigation features. After that, in future work:
29 * - linearised PDF, perhaps?
31 * - I'm uncertain of whether I need to include a ToUnicode CMap
32 * in each of my font definitions in PDF. Currently things (by
33 * which I mean cut and paste out of acroread) seem to be
34 * working fairly happily without it, but I don't know.
47 static font_data
*make_std_font(font_list
*fontlist
, char const *name
);
48 static void wrap_paragraph(para_data
*pdata
, word
*words
,
49 int w
, int i1
, int i2
);
50 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
52 static void render_line(line_data
*ldata
, int left_x
, int top_y
,
53 xref_dest
*dest
, keywordlist
*keywords
);
55 void *paper_pre_backend(paragraph
*sourceform
, keywordlist
*keywords
,
59 int indent
, extra_indent
, firstline_indent
;
61 line_data
*ldata
, *firstline
, *lastline
;
62 font_data
*tr
, *ti
, *cr
;
67 * FIXME: All these things ought to become configurable.
69 int paper_width
= 595 * 4096;
70 int paper_height
= 841 * 4096;
71 int left_margin
= 72 * 4096;
72 int top_margin
= 72 * 4096;
73 int right_margin
= 72 * 4096;
74 int bottom_margin
= 108 * 4096;
75 int indent_list_bullet
= 6 * 4096;
76 int indent_list
= 24 * 4096;
77 int indent_quote
= 18 * 4096;
78 int base_leading
= 4096;
79 int base_para_spacing
= 10 * 4096;
81 int base_width
= paper_width
- left_margin
- right_margin
;
82 int page_height
= paper_height
- top_margin
- bottom_margin
;
84 IGNORE(keywords
); /* FIXME */
85 IGNORE(idx
); /* FIXME */
86 IGNORE(indent_list_bullet
); /* FIXME */
89 * First, set up some font structures.
91 fontlist
= mknew(font_list
);
92 fontlist
->head
= fontlist
->tail
= NULL
;
93 tr
= make_std_font(fontlist
, "Times-Roman");
94 ti
= make_std_font(fontlist
, "Times-Italic");
95 cr
= make_std_font(fontlist
, "Courier");
98 * Go through and break up each paragraph into lines.
101 firstline
= lastline
= NULL
;
102 for (p
= sourceform
; p
; p
= p
->next
) {
103 p
->private_data
= NULL
;
107 * These paragraph types are either invisible or don't
108 * define text in the normal sense. Either way, they
109 * don't require wrapping.
115 case para_NotParaType
:
122 * These paragraph types don't require wrapping, but
123 * they do affect the line width to which we wrap the
124 * rest of the paragraphs, so we need to pay attention.
127 indent
+= indent_list
; break;
129 indent
-= indent_list
; assert(indent
>= 0); break;
131 indent
+= indent_quote
; break;
133 indent
-= indent_quote
; assert(indent
>= 0); break;
136 * This paragraph type is special. Process it
144 * All of these paragraph types require wrapping in the
145 * ordinary way. So we must supply a set of fonts, a
146 * line width and auxiliary information (e.g. bullet
147 * text) for each one.
151 case para_UnnumberedChapter
:
155 case para_BiblioCited
:
157 case para_NumberedList
:
158 case para_DescribedThing
:
159 case para_Description
:
162 pdata
= mknew(para_data
);
165 * FIXME: Subsidiary switch on paragraph type to decide
166 * what font set to use for this paragraph.
168 pdata
->fonts
[FONT_NORMAL
] = tr
;
169 pdata
->sizes
[FONT_NORMAL
] = 12;
170 pdata
->fonts
[FONT_EMPH
] = ti
;
171 pdata
->sizes
[FONT_EMPH
] = 12;
172 pdata
->fonts
[FONT_CODE
] = cr
;
173 pdata
->sizes
[FONT_CODE
] = 12;
176 * FIXME: Also select an indentation level depending on
177 * the paragraph type (list paragraphs other than
178 * para_DescribedThing need extra indent).
180 * Perhaps at some point we might even arrange for the
181 * user to be able to request indented first lines in
185 firstline_indent
= 0;
187 wrap_paragraph(pdata
, p
->words
, base_width
,
188 indent
+ firstline_indent
,
189 indent
+ extra_indent
);
192 * FIXME: Also find the auxiliary data for this
193 * paragraph. For para_Bullet it's a bullet; for
194 * para_NumberedList it's the number; for some section
195 * headings (depending on the style of section heading
196 * selected) it's the section number.
198 * Assign into pdata->first->aux_*.
201 p
->private_data
= pdata
;
204 * Set the line spacing for each line in this paragraph.
206 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
207 if (ldata
== pdata
->first
)
208 ldata
->space_before
= base_para_spacing
/ 2;
210 ldata
->space_before
= base_leading
/ 2;
211 if (ldata
== pdata
->last
)
212 ldata
->space_after
= base_para_spacing
/ 2;
214 ldata
->space_after
= base_leading
/ 2;
215 ldata
->page_break
= FALSE
;
219 * FIXME: some kinds of section heading do require a
220 * page break before them.
227 * Link all line structures together into a big list.
229 if (p
->private_data
) {
230 pdata
= (para_data
*)p
->private_data
;
233 lastline
->next
= pdata
->first
;
234 pdata
->first
->prev
= lastline
;
236 firstline
= pdata
->first
;
237 pdata
->first
->prev
= NULL
;
239 lastline
= pdata
->last
;
245 * Now we have an enormous linked list of every line of text in
246 * the document. Break it up into pages.
248 pages
= page_breaks(firstline
, lastline
, page_height
);
251 * Now we're ready to actually lay out the pages. We do this by
252 * looping over _paragraphs_, since we may need to track cross-
253 * references between lines and even across pages.
255 for (p
= sourceform
; p
; p
= p
->next
) {
256 pdata
= (para_data
*)p
->private_data
;
261 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
262 render_line(ldata
, left_margin
, paper_height
- top_margin
,
264 if (ldata
== pdata
->last
)
270 doc
= mknew(document
);
271 doc
->fonts
= fontlist
;
273 doc
->paper_width
= paper_width
;
274 doc
->paper_height
= paper_height
;
278 static font_encoding
*new_font_encoding(font_data
*font
)
283 fe
= mknew(font_encoding
);
286 if (font
->list
->tail
)
287 font
->list
->tail
->next
= fe
;
289 font
->list
->head
= fe
;
290 font
->list
->tail
= fe
;
295 for (i
= 0; i
< 256; i
++) {
296 fe
->vector
[i
] = NULL
;
298 fe
->to_unicode
[i
] = 0xFFFF;
304 static font_data
*make_std_font(font_list
*fontlist
, char const *name
)
312 widths
= ps_std_font_widths(name
);
316 for (nglyphs
= 0; ps_std_glyphs
[nglyphs
] != NULL
; nglyphs
++);
318 f
= mknew(font_data
);
322 f
->nglyphs
= nglyphs
;
323 f
->glyphs
= ps_std_glyphs
;
325 f
->subfont_map
= mknewa(subfont_map_entry
, nglyphs
);
328 * Our first subfont will contain all of US-ASCII. This isn't
329 * really necessary - we could just create custom subfonts
330 * precisely as the whim of render_string dictated - but
331 * instinct suggests that it might be nice to have the text in
332 * the output files look _marginally_ recognisable.
334 fe
= new_font_encoding(f
);
335 fe
->free_pos
= 0xA1; /* only the top half is free */
336 f
->latest_subfont
= fe
;
338 for (i
= 0; i
< (int)lenof(f
->bmp
); i
++)
341 for (i
= 0; i
< nglyphs
; i
++) {
343 ucs
= ps_glyph_to_unicode(f
->glyphs
[i
]);
344 assert(ucs
!= 0xFFFF);
346 if (ucs
>= 0x20 && ucs
<= 0x7E) {
347 fe
->vector
[ucs
] = f
->glyphs
[i
];
348 fe
->indices
[ucs
] = i
;
349 fe
->to_unicode
[ucs
] = ucs
;
350 f
->subfont_map
[i
].subfont
= fe
;
351 f
->subfont_map
[i
].position
= ucs
;
354 * This character is not yet assigned to a subfont.
356 f
->subfont_map
[i
].subfont
= NULL
;
357 f
->subfont_map
[i
].position
= 0;
364 static int string_width(font_data
*font
, wchar_t const *string
, int *errs
)
371 for (; *string
; string
++) {
374 index
= font
->bmp
[(unsigned short)*string
];
375 if (index
== 0xFFFF) {
379 width
+= font
->widths
[index
];
386 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
);
388 struct paper_width_ctx
{
393 static int paper_width_list(void *vctx
, word
*text
, word
*end
, int *nspaces
) {
395 while (text
&& text
!= end
) {
396 w
+= paper_width_internal(vctx
, text
, nspaces
);
402 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
)
404 struct paper_width_ctx
*ctx
= (struct paper_width_ctx
*)vctx
;
405 int style
, type
, findex
, width
, errs
;
408 switch (word
->type
) {
418 style
= towordstyle(word
->type
);
419 type
= removeattr(word
->type
);
421 findex
= (style
== word_Normal ? FONT_NORMAL
:
422 style
== word_Emph ? FONT_EMPH
:
425 if (type
== word_Normal
) {
427 } else if (type
== word_WhiteSpace
) {
428 if (findex
!= FONT_CODE
) {
431 return ctx
->minspacewidth
;
434 } else /* if (type == word_Quote) */ {
435 if (word
->aux
== quote_Open
)
436 str
= L
"\x2018"; /* FIXME: configurability! */
438 str
= L
"\x2019"; /* FIXME: configurability! */
441 width
= string_width(ctx
->pdata
->fonts
[findex
], str
, &errs
);
443 if (errs
&& word
->alt
)
444 return paper_width_list(vctx
, word
->alt
, NULL
, nspaces
);
446 return ctx
->pdata
->sizes
[findex
] * width
;
449 static int paper_width(void *vctx
, word
*word
)
451 return paper_width_internal(vctx
, word
, NULL
);
454 static void wrap_paragraph(para_data
*pdata
, word
*words
,
455 int w
, int i1
, int i2
)
457 wrappedline
*wrapping
, *p
;
459 struct paper_width_ctx ctx
;
463 * We're going to need to store the line height in every line
464 * structure we generate.
469 for (i
= 0; i
< NFONTS
; i
++)
470 if (line_height
< pdata
->sizes
[i
])
471 line_height
= pdata
->sizes
[i
];
475 spacewidth
= (pdata
->sizes
[FONT_NORMAL
] *
476 string_width(pdata
->fonts
[FONT_NORMAL
], L
" ", NULL
));
477 if (spacewidth
== 0) {
479 * A font without a space?! Disturbing. I hope this never
480 * comes up, but I'll make a random guess anyway and set my
481 * space width to half the point size.
483 spacewidth
= pdata
->sizes
[FONT_NORMAL
] * 4096 / 2;
487 * I'm going to set the _minimum_ space width to 3/5 of the
488 * standard one, and use the standard one as the optimum.
490 ctx
.minspacewidth
= spacewidth
* 3 / 5;
493 wrapping
= wrap_para(words
, w
- i1
, w
- i2
, paper_width
, &ctx
, spacewidth
);
496 * Having done the wrapping, we now concoct a set of line_data
499 pdata
->first
= pdata
->last
= NULL
;
501 for (p
= wrapping
; p
; p
= p
->next
) {
504 int len
, wid
, spaces
;
506 ldata
= mknew(line_data
);
508 ldata
->pdata
= pdata
;
509 ldata
->first
= p
->begin
;
511 ldata
->line_height
= line_height
;
513 ldata
->xpos
= (p
== wrapping ? i1
: i2
);
516 pdata
->last
->next
= ldata
;
517 ldata
->prev
= pdata
->last
;
519 pdata
->first
= ldata
;
526 len
= paper_width_list(&ctx
, ldata
->first
, ldata
->end
, &spaces
);
527 wid
= (p
== wrapping ? w
- i1
: w
- i2
);
530 ldata
->hshortfall
= wid
- len
;
531 ldata
->nspaces
= spaces
;
533 * This tells us how much the space width needs to
534 * change from _min_spacewidth. But we want to store
535 * its difference from the _natural_ space width, to
536 * make the text rendering easier.
538 ldata
->hshortfall
+= ctx
.minspacewidth
* spaces
;
539 ldata
->hshortfall
-= spacewidth
* spaces
;
541 * Special case: on the last line of a paragraph, we
542 * never stretch spaces.
544 if (ldata
->hshortfall
> 0 && !p
->next
)
545 ldata
->hshortfall
= 0;
547 ldata
->aux_text
= NULL
;
548 ldata
->aux_left_indent
= 0;
553 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
560 * Page breaking is done by a close analogue of the optimal
561 * paragraph wrapping algorithm used by wrap_para(). We work
562 * backwards from the end of the document line by line; for
563 * each line, we contemplate every possible number of lines we
564 * could put on a page starting with that line, determine a
565 * cost function for each one, add it to the pre-computed cost
566 * function for optimally page-breaking everything after that
567 * page, and pick the best option.
569 * Since my line_data structures are only used for this
570 * purpose, I might as well just store the algorithm data
574 for (l
= last
; l
; l
= l
->prev
) {
575 int minheight
, text
= 0, space
= 0;
579 for (m
= l
; m
; m
= m
->next
) {
580 if (m
!= l
&& m
->page_break
)
581 break; /* we've gone as far as we can */
584 space
+= m
->prev
->space_after
;
585 if (m
!= l
|| m
->page_break
)
586 space
+= m
->space_before
;
587 text
+= m
->line_height
;
588 minheight
= text
+ space
;
590 if (m
!= l
&& minheight
> page_height
)
594 * Compute the cost of this arrangement, as the square
595 * of the amount of wasted space on the page.
596 * Exception: if this is the last page before a
597 * mandatory break or the document end, we don't
598 * penalise a large blank area.
600 if (m
->next
&& !m
->next
->page_break
)
602 int x
= page_height
- minheight
;
609 cost
+= (x
* xf
) >> 8;
614 * FIXME: here I should introduce penalties for
615 * breaking in mid-paragraph, particularly very close
616 * to one end of a paragraph and particularly in code
620 if (m
->next
&& !m
->next
->page_break
)
621 cost
+= m
->next
->bestcost
;
623 if (l
->bestcost
== -1 || l
->bestcost
> cost
) {
625 * This is the best option yet for this starting
629 if (m
->next
&& !m
->next
->page_break
)
630 l
->vshortfall
= page_height
- minheight
;
641 * Now go through the line list forwards and assemble the
651 page
= mknew(page_data
);
660 page
->first_line
= l
;
661 page
->last_line
= l
->page_last
;
663 page
->first_text
= page
->last_text
= NULL
;
665 page
->first_xref
= page
->last_xref
= NULL
;
668 * Now assign a y-coordinate to each line on the page.
671 for (l
= page
->first_line
; l
; l
= l
->next
) {
672 if (l
!= page
->first_line
)
673 space
+= l
->prev
->space_after
;
674 if (l
!= page
->first_line
|| l
->page_break
)
675 space
+= l
->space_before
;
676 text
+= l
->line_height
;
679 l
->ypos
= text
+ space
+
680 space
* (float)page
->first_line
->vshortfall
/
681 page
->first_line
->space
;
683 if (l
== page
->last_line
)
687 l
= page
->last_line
->next
;
693 static void add_string_to_page(page_data
*page
, int x
, int y
,
694 font_encoding
*fe
, int size
, char *text
)
698 frag
= mknew(text_fragment
);
702 page
->last_text
->next
= frag
;
704 page
->first_text
= frag
;
705 page
->last_text
= frag
;
710 frag
->fontsize
= size
;
711 frag
->text
= dupstr(text
);
715 * Returns the updated x coordinate.
717 static int render_string(page_data
*page
, font_data
*font
, int fontsize
,
718 int x
, int y
, wchar_t *str
)
721 int textpos
, textwid
, glyph
;
722 font_encoding
*subfont
= NULL
, *sf
;
724 text
= mknewa(char, 1 + ustrlen(str
));
725 textpos
= textwid
= 0;
728 glyph
= font
->bmp
[*str
];
731 continue; /* nothing more we can do here */
734 * Find which subfont this character is going in.
736 sf
= font
->subfont_map
[glyph
].subfont
;
742 * This character is not yet in a subfont. Assign one.
744 if (font
->latest_subfont
->free_pos
>= 0x100)
745 font
->latest_subfont
= new_font_encoding(font
);
747 c
= font
->latest_subfont
->free_pos
++;
748 if (font
->latest_subfont
->free_pos
== 0x7F)
749 font
->latest_subfont
->free_pos
= 0xA1;
751 font
->subfont_map
[glyph
].subfont
= font
->latest_subfont
;
752 font
->subfont_map
[glyph
].position
= c
;
753 font
->latest_subfont
->vector
[c
] = font
->glyphs
[glyph
];
754 font
->latest_subfont
->indices
[c
] = glyph
;
755 font
->latest_subfont
->to_unicode
[c
] = *str
;
757 sf
= font
->latest_subfont
;
760 if (!subfont
|| sf
!= subfont
) {
762 text
[textpos
] = '\0';
763 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
766 assert(textpos
== 0);
772 text
[textpos
++] = font
->subfont_map
[glyph
].position
;
773 textwid
+= font
->widths
[glyph
] * fontsize
;
779 text
[textpos
] = '\0';
780 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
788 * Returns the updated x coordinate.
790 static int render_text(page_data
*page
, para_data
*pdata
, line_data
*ldata
,
791 int x
, int y
, word
*text
, word
*text_end
, xref
**xr
,
792 int shortfall
, int nspaces
, int *nspace
,
793 keywordlist
*keywords
)
795 while (text
&& text
!= text_end
) {
796 int style
, type
, findex
, errs
;
800 switch (text
->type
) {
802 * Start a cross-reference.
808 if (text
->type
== word_HyperLink
) {
810 dest
.url
= utoa_dup(text
->text
);
813 keyword
*kwl
= kw_lookup(keywords
, text
->text
);
817 assert(kwl
->para
->private_data
);
818 pdata
= (para_data
*) kwl
->para
->private_data
;
820 dest
.page
= pdata
->first
->page
;
824 * Shouldn't happen, but *shrug*
831 if (dest
.type
!= NONE
) {
833 (*xr
)->dest
= dest
; /* structure copy */
835 page
->last_xref
->next
= *xr
;
837 page
->first_xref
= *xr
;
838 page
->last_xref
= *xr
;
841 * FIXME: Ideally we should have, and use, some
842 * vertical font metric information here so that
843 * our cross-ref rectangle can take account of
844 * descenders and the font's cap height. This will
845 * do for the moment, but it isn't ideal.
847 (*xr
)->lx
= (*xr
)->rx
= x
;
849 (*xr
)->ty
= y
+ ldata
->line_height
;
854 * Finish extending a cross-reference box.
864 * FIXME: we should do something with all of these!
865 * Hyperlinks and xrefs have meaning in PDF, and this
866 * is probably the right place to nail down the index
871 style
= towordstyle(text
->type
);
872 type
= removeattr(text
->type
);
874 findex
= (style
== word_Normal ? FONT_NORMAL
:
875 style
== word_Emph ? FONT_EMPH
:
878 if (type
== word_Normal
) {
880 } else if (type
== word_WhiteSpace
) {
881 x
+= pdata
->sizes
[findex
] *
882 string_width(pdata
->fonts
[findex
], L
" ", NULL
);
883 if (nspaces
&& findex
!= FONT_CODE
) {
884 x
+= (*nspace
+1) * shortfall
/ nspaces
;
885 x
-= *nspace
* shortfall
/ nspaces
;
889 } else /* if (type == word_Quote) */ {
890 if (text
->aux
== quote_Open
)
891 str
= L
"\x2018"; /* FIXME: configurability! */
893 str
= L
"\x2019"; /* FIXME: configurability! */
896 (void) string_width(pdata
->fonts
[findex
], str
, &errs
);
898 if (errs
&& text
->alt
)
899 x
= render_text(page
, pdata
, ldata
, x
, y
, text
->alt
, NULL
,
900 xr
, shortfall
, nspaces
, nspace
, keywords
);
902 x
= render_string(page
, pdata
->fonts
[findex
],
903 pdata
->sizes
[findex
], x
, y
, str
);
915 static void render_line(line_data
*ldata
, int left_x
, int top_y
,
916 xref_dest
*dest
, keywordlist
*keywords
)
921 if (ldata
->aux_text
) {
924 render_text(ldata
->page
, ldata
->pdata
, ldata
,
925 left_x
+ ldata
->aux_left_indent
,
927 ldata
->aux_text
, NULL
, &xr
, 0, 0, &nspace
, keywords
);
932 * There might be a cross-reference carried over from a
935 if (dest
->type
!= NONE
) {
937 xr
->dest
= *dest
; /* structure copy */
938 if (ldata
->page
->last_xref
)
939 ldata
->page
->last_xref
->next
= xr
;
941 ldata
->page
->first_xref
= xr
;
942 ldata
->page
->last_xref
= xr
;
943 xr
->lx
= xr
->rx
= left_x
+ ldata
->xpos
;
944 xr
->by
= top_y
- ldata
->ypos
;
945 xr
->ty
= top_y
- ldata
->ypos
+ ldata
->line_height
;
949 render_text(ldata
->page
, ldata
->pdata
, ldata
, left_x
+ ldata
->xpos
,
950 top_y
- ldata
->ypos
, ldata
->first
, ldata
->end
, &xr
,
951 ldata
->hshortfall
, ldata
->nspaces
, &nspace
, keywords
);
955 * There's a cross-reference continued on to the next line.