2 * Paper printing pre-backend for Halibut.
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
15 * - set up contents section now we know what sections begin on
18 * - do cross-reference rectangles
22 * - all the missing features in text rendering (code paragraphs,
23 * list bullets, indentation, section heading styles)
27 * That should bring us to the same level of functionality that
28 * original-Halibut had, and the same in PDF plus the obvious
29 * interactive navigation features. After that, in future work:
31 * - linearised PDF, perhaps?
33 * - I'm uncertain of whether I need to include a ToUnicode CMap
34 * in each of my font definitions in PDF. Currently things (by
35 * which I mean cut and paste out of acroread) seem to be
36 * working fairly happily without it, but I don't know.
49 static font_data
*make_std_font(font_list
*fontlist
, char const *name
);
50 static void wrap_paragraph(para_data
*pdata
, word
*words
,
51 int w
, int i1
, int i2
);
52 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
54 static void render_line(line_data
*ldata
, int left_x
, int top_y
);
56 void *paper_pre_backend(paragraph
*sourceform
, keywordlist
*keywords
,
60 int indent
, extra_indent
, firstline_indent
;
62 line_data
*ldata
, *firstline
, *lastline
;
63 font_data
*tr
, *ti
, *cr
;
68 * FIXME: All these things ought to become configurable.
70 int paper_width
= 595 * 4096;
71 int paper_height
= 841 * 4096;
72 int left_margin
= 72 * 4096;
73 int top_margin
= 72 * 4096;
74 int right_margin
= 72 * 4096;
75 int bottom_margin
= 108 * 4096;
76 int indent_list_bullet
= 6 * 4096;
77 int indent_list
= 24 * 4096;
78 int indent_quote
= 18 * 4096;
79 int base_leading
= 4096;
80 int base_para_spacing
= 10 * 4096;
82 int base_width
= paper_width
- left_margin
- right_margin
;
83 int page_height
= paper_height
- top_margin
- bottom_margin
;
85 IGNORE(keywords
); /* FIXME */
86 IGNORE(idx
); /* FIXME */
87 IGNORE(indent_list_bullet
); /* FIXME */
90 * First, set up some font structures.
92 fontlist
= mknew(font_list
);
93 fontlist
->head
= fontlist
->tail
= NULL
;
94 tr
= make_std_font(fontlist
, "Times-Roman");
95 ti
= make_std_font(fontlist
, "Times-Italic");
96 cr
= make_std_font(fontlist
, "Courier");
99 * Go through and break up each paragraph into lines.
102 firstline
= lastline
= NULL
;
103 for (p
= sourceform
; p
; p
= p
->next
) {
104 p
->private_data
= NULL
;
108 * These paragraph types are either invisible or don't
109 * define text in the normal sense. Either way, they
110 * don't require wrapping.
116 case para_NotParaType
:
123 * These paragraph types don't require wrapping, but
124 * they do affect the line width to which we wrap the
125 * rest of the paragraphs, so we need to pay attention.
128 indent
+= indent_list
; break;
130 indent
-= indent_list
; assert(indent
>= 0); break;
132 indent
+= indent_quote
; break;
134 indent
-= indent_quote
; assert(indent
>= 0); break;
137 * This paragraph type is special. Process it
145 * All of these paragraph types require wrapping in the
146 * ordinary way. So we must supply a set of fonts, a
147 * line width and auxiliary information (e.g. bullet
148 * text) for each one.
152 case para_UnnumberedChapter
:
156 case para_BiblioCited
:
158 case para_NumberedList
:
159 case para_DescribedThing
:
160 case para_Description
:
163 pdata
= mknew(para_data
);
166 * FIXME: Subsidiary switch on paragraph type to decide
167 * what font set to use for this paragraph.
169 pdata
->fonts
[FONT_NORMAL
] = tr
;
170 pdata
->sizes
[FONT_NORMAL
] = 12;
171 pdata
->fonts
[FONT_EMPH
] = ti
;
172 pdata
->sizes
[FONT_EMPH
] = 12;
173 pdata
->fonts
[FONT_CODE
] = cr
;
174 pdata
->sizes
[FONT_CODE
] = 12;
177 * FIXME: Also select an indentation level depending on
178 * the paragraph type (list paragraphs other than
179 * para_DescribedThing need extra indent).
181 * Perhaps at some point we might even arrange for the
182 * user to be able to request indented first lines in
186 firstline_indent
= 0;
188 wrap_paragraph(pdata
, p
->words
, base_width
,
189 indent
+ firstline_indent
,
190 indent
+ extra_indent
);
193 * FIXME: Also find the auxiliary data for this
194 * paragraph. For para_Bullet it's a bullet; for
195 * para_NumberedList it's the number; for some section
196 * headings (depending on the style of section heading
197 * selected) it's the section number.
199 * Assign into pdata->first->aux_*.
202 p
->private_data
= pdata
;
205 * Set the line spacing for each line in this paragraph.
207 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
208 if (ldata
== pdata
->first
)
209 ldata
->space_before
= base_para_spacing
/ 2;
211 ldata
->space_before
= base_leading
/ 2;
212 if (ldata
== pdata
->last
)
213 ldata
->space_after
= base_para_spacing
/ 2;
215 ldata
->space_after
= base_leading
/ 2;
216 ldata
->page_break
= FALSE
;
220 * FIXME: some kinds of section heading do require a
221 * page break before them.
228 * Link all line structures together into a big list.
230 if (p
->private_data
) {
231 pdata
= (para_data
*)p
->private_data
;
234 lastline
->next
= pdata
->first
;
235 pdata
->first
->prev
= lastline
;
237 firstline
= pdata
->first
;
238 pdata
->first
->prev
= NULL
;
240 lastline
= pdata
->last
;
246 * Now we have an enormous linked list of every line of text in
247 * the document. Break it up into pages.
249 pages
= page_breaks(firstline
, lastline
, page_height
);
252 * Now we're ready to actually lay out the pages. We do this by
253 * looping over _paragraphs_, since we may need to track cross-
254 * references between lines and even across pages.
256 for (p
= sourceform
; p
; p
= p
->next
) {
257 pdata
= (para_data
*)p
->private_data
;
260 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
261 render_line(ldata
, left_margin
, paper_height
- top_margin
);
262 if (ldata
== pdata
->last
)
268 doc
= mknew(document
);
269 doc
->fonts
= fontlist
;
271 doc
->paper_width
= paper_width
;
272 doc
->paper_height
= paper_height
;
276 static font_encoding
*new_font_encoding(font_data
*font
)
281 fe
= mknew(font_encoding
);
284 if (font
->list
->tail
)
285 font
->list
->tail
->next
= fe
;
287 font
->list
->head
= fe
;
288 font
->list
->tail
= fe
;
293 for (i
= 0; i
< 256; i
++) {
294 fe
->vector
[i
] = NULL
;
296 fe
->to_unicode
[i
] = 0xFFFF;
302 static font_data
*make_std_font(font_list
*fontlist
, char const *name
)
310 widths
= ps_std_font_widths(name
);
314 for (nglyphs
= 0; ps_std_glyphs
[nglyphs
] != NULL
; nglyphs
++);
316 f
= mknew(font_data
);
320 f
->nglyphs
= nglyphs
;
321 f
->glyphs
= ps_std_glyphs
;
323 f
->subfont_map
= mknewa(subfont_map_entry
, nglyphs
);
326 * Our first subfont will contain all of US-ASCII. This isn't
327 * really necessary - we could just create custom subfonts
328 * precisely as the whim of render_string dictated - but
329 * instinct suggests that it might be nice to have the text in
330 * the output files look _marginally_ recognisable.
332 fe
= new_font_encoding(f
);
333 fe
->free_pos
= 0xA1; /* only the top half is free */
334 f
->latest_subfont
= fe
;
336 for (i
= 0; i
< (int)lenof(f
->bmp
); i
++)
339 for (i
= 0; i
< nglyphs
; i
++) {
341 ucs
= ps_glyph_to_unicode(f
->glyphs
[i
]);
342 assert(ucs
!= 0xFFFF);
344 if (ucs
>= 0x20 && ucs
<= 0x7E) {
345 fe
->vector
[ucs
] = f
->glyphs
[i
];
346 fe
->indices
[ucs
] = i
;
347 fe
->to_unicode
[ucs
] = ucs
;
348 f
->subfont_map
[i
].subfont
= fe
;
349 f
->subfont_map
[i
].position
= ucs
;
352 * This character is not yet assigned to a subfont.
354 f
->subfont_map
[i
].subfont
= NULL
;
355 f
->subfont_map
[i
].position
= 0;
362 static int string_width(font_data
*font
, wchar_t const *string
, int *errs
)
369 for (; *string
; string
++) {
372 index
= font
->bmp
[(unsigned short)*string
];
373 if (index
== 0xFFFF) {
377 width
+= font
->widths
[index
];
384 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
);
386 struct paper_width_ctx
{
391 static int paper_width_list(void *vctx
, word
*text
, word
*end
, int *nspaces
) {
393 while (text
&& text
!= end
) {
394 w
+= paper_width_internal(vctx
, text
, nspaces
);
400 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
)
402 struct paper_width_ctx
*ctx
= (struct paper_width_ctx
*)vctx
;
403 int style
, type
, findex
, width
, errs
;
406 switch (word
->type
) {
416 style
= towordstyle(word
->type
);
417 type
= removeattr(word
->type
);
419 findex
= (style
== word_Normal ? FONT_NORMAL
:
420 style
== word_Emph ? FONT_EMPH
:
423 if (type
== word_Normal
) {
425 } else if (type
== word_WhiteSpace
) {
426 if (findex
!= FONT_CODE
) {
429 return ctx
->minspacewidth
;
432 } else /* if (type == word_Quote) */ {
433 if (word
->aux
== quote_Open
)
434 str
= L
"\x2018"; /* FIXME: configurability! */
436 str
= L
"\x2019"; /* FIXME: configurability! */
439 width
= string_width(ctx
->pdata
->fonts
[findex
], str
, &errs
);
441 if (errs
&& word
->alt
)
442 return paper_width_list(vctx
, word
->alt
, NULL
, nspaces
);
444 return ctx
->pdata
->sizes
[findex
] * width
;
447 static int paper_width(void *vctx
, word
*word
)
449 return paper_width_internal(vctx
, word
, NULL
);
452 static void wrap_paragraph(para_data
*pdata
, word
*words
,
453 int w
, int i1
, int i2
)
455 wrappedline
*wrapping
, *p
;
457 struct paper_width_ctx ctx
;
461 * We're going to need to store the line height in every line
462 * structure we generate.
467 for (i
= 0; i
< NFONTS
; i
++)
468 if (line_height
< pdata
->sizes
[i
])
469 line_height
= pdata
->sizes
[i
];
473 spacewidth
= (pdata
->sizes
[FONT_NORMAL
] *
474 string_width(pdata
->fonts
[FONT_NORMAL
], L
" ", NULL
));
475 if (spacewidth
== 0) {
477 * A font without a space?! Disturbing. I hope this never
478 * comes up, but I'll make a random guess anyway and set my
479 * space width to half the point size.
481 spacewidth
= pdata
->sizes
[FONT_NORMAL
] * 4096 / 2;
485 * I'm going to set the _minimum_ space width to 3/5 of the
486 * standard one, and use the standard one as the optimum.
488 ctx
.minspacewidth
= spacewidth
* 3 / 5;
491 wrapping
= wrap_para(words
, w
- i1
, w
- i2
, paper_width
, &ctx
, spacewidth
);
494 * Having done the wrapping, we now concoct a set of line_data
497 pdata
->first
= pdata
->last
= NULL
;
499 for (p
= wrapping
; p
; p
= p
->next
) {
502 int len
, wid
, spaces
;
504 ldata
= mknew(line_data
);
506 ldata
->pdata
= pdata
;
507 ldata
->first
= p
->begin
;
509 ldata
->line_height
= line_height
;
511 ldata
->xpos
= (p
== wrapping ? i1
: i2
);
514 pdata
->last
->next
= ldata
;
515 ldata
->prev
= pdata
->last
;
517 pdata
->first
= ldata
;
524 len
= paper_width_list(&ctx
, ldata
->first
, ldata
->end
, &spaces
);
525 wid
= (p
== wrapping ? w
- i1
: w
- i2
);
528 ldata
->hshortfall
= wid
- len
;
529 ldata
->nspaces
= spaces
;
531 * This tells us how much the space width needs to
532 * change from _min_spacewidth. But we want to store
533 * its difference from the _natural_ space width, to
534 * make the text rendering easier.
536 ldata
->hshortfall
+= ctx
.minspacewidth
* spaces
;
537 ldata
->hshortfall
-= spacewidth
* spaces
;
539 * Special case: on the last line of a paragraph, we
540 * never stretch spaces.
542 if (ldata
->hshortfall
> 0 && !p
->next
)
543 ldata
->hshortfall
= 0;
545 ldata
->aux_text
= NULL
;
546 ldata
->aux_left_indent
= 0;
551 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
558 * Page breaking is done by a close analogue of the optimal
559 * paragraph wrapping algorithm used by wrap_para(). We work
560 * backwards from the end of the document line by line; for
561 * each line, we contemplate every possible number of lines we
562 * could put on a page starting with that line, determine a
563 * cost function for each one, add it to the pre-computed cost
564 * function for optimally page-breaking everything after that
565 * page, and pick the best option.
567 * Since my line_data structures are only used for this
568 * purpose, I might as well just store the algorithm data
572 for (l
= last
; l
; l
= l
->prev
) {
573 int minheight
, text
= 0, space
= 0;
577 for (m
= l
; m
; m
= m
->next
) {
578 if (m
!= l
&& m
->page_break
)
579 break; /* we've gone as far as we can */
582 space
+= m
->prev
->space_after
;
583 if (m
!= l
|| m
->page_break
)
584 space
+= m
->space_before
;
585 text
+= m
->line_height
;
586 minheight
= text
+ space
;
588 if (m
!= l
&& minheight
> page_height
)
592 * Compute the cost of this arrangement, as the square
593 * of the amount of wasted space on the page.
594 * Exception: if this is the last page before a
595 * mandatory break or the document end, we don't
596 * penalise a large blank area.
598 if (m
->next
&& !m
->next
->page_break
)
600 int x
= page_height
- minheight
;
607 cost
+= (x
* xf
) >> 8;
612 * FIXME: here I should introduce penalties for
613 * breaking in mid-paragraph, particularly very close
614 * to one end of a paragraph and particularly in code
618 if (m
->next
&& !m
->next
->page_break
)
619 cost
+= m
->next
->bestcost
;
621 if (l
->bestcost
== -1 || l
->bestcost
> cost
) {
623 * This is the best option yet for this starting
627 if (m
->next
&& !m
->next
->page_break
)
628 l
->vshortfall
= page_height
- minheight
;
639 * Now go through the line list forwards and assemble the
649 page
= mknew(page_data
);
658 page
->first_line
= l
;
659 page
->last_line
= l
->page_last
;
661 page
->first_text
= page
->last_text
= NULL
;
664 * Now assign a y-coordinate to each line on the page.
667 for (l
= page
->first_line
; l
; l
= l
->next
) {
668 if (l
!= page
->first_line
)
669 space
+= l
->prev
->space_after
;
670 if (l
!= page
->first_line
|| l
->page_break
)
671 space
+= l
->space_before
;
672 text
+= l
->line_height
;
675 l
->ypos
= text
+ space
+
676 space
* (float)page
->first_line
->vshortfall
/
677 page
->first_line
->space
;
679 if (l
== page
->last_line
)
683 l
= page
->last_line
->next
;
689 static void add_string_to_page(page_data
*page
, int x
, int y
,
690 font_encoding
*fe
, int size
, char *text
)
694 frag
= mknew(text_fragment
);
698 page
->last_text
->next
= frag
;
700 page
->first_text
= frag
;
701 page
->last_text
= frag
;
706 frag
->fontsize
= size
;
707 frag
->text
= dupstr(text
);
711 * Returns the updated x coordinate.
713 static int render_string(page_data
*page
, font_data
*font
, int fontsize
,
714 int x
, int y
, wchar_t *str
)
717 int textpos
, textwid
, glyph
;
718 font_encoding
*subfont
= NULL
, *sf
;
720 text
= mknewa(char, 1 + ustrlen(str
));
721 textpos
= textwid
= 0;
724 glyph
= font
->bmp
[*str
];
727 continue; /* nothing more we can do here */
730 * Find which subfont this character is going in.
732 sf
= font
->subfont_map
[glyph
].subfont
;
738 * This character is not yet in a subfont. Assign one.
740 if (font
->latest_subfont
->free_pos
>= 0x100)
741 font
->latest_subfont
= new_font_encoding(font
);
743 c
= font
->latest_subfont
->free_pos
++;
744 if (font
->latest_subfont
->free_pos
== 0x7F)
745 font
->latest_subfont
->free_pos
= 0xA1;
747 font
->subfont_map
[glyph
].subfont
= font
->latest_subfont
;
748 font
->subfont_map
[glyph
].position
= c
;
749 font
->latest_subfont
->vector
[c
] = font
->glyphs
[glyph
];
750 font
->latest_subfont
->indices
[c
] = glyph
;
751 font
->latest_subfont
->to_unicode
[c
] = *str
;
753 sf
= font
->latest_subfont
;
756 if (!subfont
|| sf
!= subfont
) {
758 text
[textpos
] = '\0';
759 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
762 assert(textpos
== 0);
768 text
[textpos
++] = font
->subfont_map
[glyph
].position
;
769 textwid
+= font
->widths
[glyph
] * fontsize
;
775 text
[textpos
] = '\0';
776 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
784 * Returns the updated x coordinate.
786 static int render_text(page_data
*page
, para_data
*pdata
, int x
, int y
,
787 word
*text
, word
*text_end
,
788 int shortfall
, int nspaces
, int *nspace
)
790 while (text
&& text
!= text_end
) {
791 int style
, type
, findex
, errs
;
794 switch (text
->type
) {
803 * FIXME: we should do something with all of these!
804 * Hyperlinks and xrefs have meaning in PDF, and this
805 * is probably the right place to nail down the index
810 style
= towordstyle(text
->type
);
811 type
= removeattr(text
->type
);
813 findex
= (style
== word_Normal ? FONT_NORMAL
:
814 style
== word_Emph ? FONT_EMPH
:
817 if (type
== word_Normal
) {
819 } else if (type
== word_WhiteSpace
) {
820 x
+= pdata
->sizes
[findex
] *
821 string_width(pdata
->fonts
[findex
], L
" ", NULL
);
822 if (nspaces
&& findex
!= FONT_CODE
) {
823 x
+= (*nspace
+1) * shortfall
/ nspaces
;
824 x
-= *nspace
* shortfall
/ nspaces
;
828 } else /* if (type == word_Quote) */ {
829 if (text
->aux
== quote_Open
)
830 str
= L
"\x2018"; /* FIXME: configurability! */
832 str
= L
"\x2019"; /* FIXME: configurability! */
835 (void) string_width(pdata
->fonts
[findex
], str
, &errs
);
837 if (errs
&& text
->alt
)
838 x
= render_text(page
, pdata
, x
, y
, text
->alt
, NULL
,
839 shortfall
, nspaces
, nspace
);
841 x
= render_string(page
, pdata
->fonts
[findex
],
842 pdata
->sizes
[findex
], x
, y
, str
);
851 static void render_line(line_data
*ldata
, int left_x
, int top_y
)
854 if (ldata
->aux_text
) {
856 render_text(ldata
->page
, ldata
->pdata
, left_x
+ ldata
->aux_left_indent
,
857 top_y
- ldata
->ypos
, ldata
->aux_text
, NULL
, 0, 0, &nspace
);
860 render_text(ldata
->page
, ldata
->pdata
, left_x
+ ldata
->xpos
,
861 top_y
- ldata
->ypos
, ldata
->first
, ldata
->end
,
862 ldata
->hshortfall
, ldata
->nspaces
, &nspace
);