2 * Paper printing pre-backend for Halibut.
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
15 * - set up contents section now we know what sections begin on
20 * - header/footer? Page numbers at least would be handy. Fully
21 * configurable footer can wait, though.
23 * That should bring us to the same level of functionality that
24 * original-Halibut had, and the same in PDF plus the obvious
25 * interactive navigation features. After that, in future work:
27 * - linearised PDF, perhaps?
29 * - I'm uncertain of whether I need to include a ToUnicode CMap
30 * in each of my font definitions in PDF. Currently things (by
31 * which I mean cut and paste out of acroread) seem to be
32 * working fairly happily without it, but I don't know.
45 typedef struct paper_conf_Tag paper_conf
;
47 struct paper_conf_Tag
{
54 int indent_list_bullet
;
58 int base_para_spacing
;
59 int chapter_top_space
;
60 int sect_num_left_space
;
61 int chapter_underline_depth
;
62 int chapter_underline_thickness
;
65 /* These are derived from the above */
68 /* Fonts used in the configuration */
69 font_data
*tr
, *ti
, *hr
, *hi
, *cr
, *co
, *cb
;
72 static font_data
*make_std_font(font_list
*fontlist
, char const *name
);
73 static void wrap_paragraph(para_data
*pdata
, word
*words
,
74 int w
, int i1
, int i2
);
75 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
77 static void render_line(line_data
*ldata
, int left_x
, int top_y
,
78 xref_dest
*dest
, keywordlist
*keywords
);
79 static int paper_width_simple(para_data
*pdata
, word
*text
);
80 static para_data
*code_paragraph(int indent
, word
*words
, paper_conf
*conf
);
81 static para_data
*rule_paragraph(int indent
, paper_conf
*conf
);
82 static void add_rect_to_page(page_data
*page
, int x
, int y
, int w
, int h
);
83 static para_data
*make_para_data(int ptype
, int paux
, int indent
,
84 word
*pkwtext
, word
*pkwtext2
, word
*pwords
,
86 static void standard_line_spacing(para_data
*pdata
, paper_conf
*conf
);
87 static wchar_t *prepare_outline_title(word
*first
, wchar_t *separator
,
90 void *paper_pre_backend(paragraph
*sourceform
, keywordlist
*keywords
,
95 para_data
*pdata
, *firstpara
= NULL
, *lastpara
= NULL
;
96 line_data
*ldata
, *firstline
, *lastline
;
102 * FIXME: All these things ought to become configurable.
104 conf
= mknew(paper_conf
);
105 conf
->paper_width
= 595 * 4096;
106 conf
->paper_height
= 841 * 4096;
107 conf
->left_margin
= 72 * 4096;
108 conf
->top_margin
= 72 * 4096;
109 conf
->right_margin
= 72 * 4096;
110 conf
->bottom_margin
= 108 * 4096;
111 conf
->indent_list_bullet
= 6 * 4096;
112 conf
->indent_list
= 24 * 4096;
113 conf
->indent_quote
= 18 * 4096;
114 conf
->base_leading
= 4096;
115 conf
->base_para_spacing
= 10 * 4096;
116 conf
->chapter_top_space
= 72 * 4096;
117 conf
->sect_num_left_space
= 12 * 4096;
118 conf
->chapter_underline_depth
= 14 * 4096;
119 conf
->chapter_underline_thickness
= 3 * 4096;
120 conf
->rule_thickness
= 1 * 4096;
121 conf
->base_font_size
= 12;
124 conf
->paper_width
- conf
->left_margin
- conf
->right_margin
;
126 conf
->paper_height
- conf
->top_margin
- conf
->bottom_margin
;
128 IGNORE(idx
); /* FIXME */
131 * First, set up some font structures.
133 fontlist
= mknew(font_list
);
134 fontlist
->head
= fontlist
->tail
= NULL
;
135 conf
->tr
= make_std_font(fontlist
, "Times-Roman");
136 conf
->ti
= make_std_font(fontlist
, "Times-Italic");
137 conf
->hr
= make_std_font(fontlist
, "Helvetica-Bold");
138 conf
->hi
= make_std_font(fontlist
, "Helvetica-BoldOblique");
139 conf
->cr
= make_std_font(fontlist
, "Courier");
140 conf
->co
= make_std_font(fontlist
, "Courier-Oblique");
141 conf
->cb
= make_std_font(fontlist
, "Courier-Bold");
144 * Go through and break up each paragraph into lines.
147 firstline
= lastline
= NULL
;
148 for (p
= sourceform
; p
; p
= p
->next
) {
149 p
->private_data
= NULL
;
153 * These paragraph types are either invisible or don't
154 * define text in the normal sense. Either way, they
155 * don't require wrapping.
160 case para_NotParaType
:
167 * These paragraph types don't require wrapping, but
168 * they do affect the line width to which we wrap the
169 * rest of the paragraphs, so we need to pay attention.
172 indent
+= conf
->indent_list
; break;
174 indent
-= conf
->indent_list
; assert(indent
>= 0); break;
176 indent
+= conf
->indent_quote
; break;
178 indent
-= conf
->indent_quote
; assert(indent
>= 0); break;
181 * This paragraph type is special. Process it
185 pdata
= code_paragraph(indent
, p
->words
, conf
);
186 p
->private_data
= pdata
;
187 if (pdata
->first
!= pdata
->last
) {
188 pdata
->first
->penalty_after
+= 100000;
189 pdata
->last
->penalty_before
+= 100000;
194 * This paragraph is also special.
197 pdata
= rule_paragraph(indent
, conf
);
198 p
->private_data
= pdata
;
202 * All of these paragraph types require wrapping in the
203 * ordinary way. So we must supply a set of fonts, a
204 * line width and auxiliary information (e.g. bullet
205 * text) for each one.
209 case para_UnnumberedChapter
:
213 case para_BiblioCited
:
215 case para_NumberedList
:
216 case para_DescribedThing
:
217 case para_Description
:
220 pdata
= make_para_data(p
->type
, p
->aux
, indent
,
221 p
->kwtext
, p
->kwtext2
, p
->words
, conf
);
223 p
->private_data
= pdata
;
228 if (p
->private_data
) {
229 pdata
= (para_data
*)p
->private_data
;
232 * Link all line structures together into a big list.
236 lastline
->next
= pdata
->first
;
237 pdata
->first
->prev
= lastline
;
239 firstline
= pdata
->first
;
240 pdata
->first
->prev
= NULL
;
242 lastline
= pdata
->last
;
246 * Link all paragraph structures together similarly.
250 lastpara
->next
= pdata
;
258 * Now we have an enormous linked list of every line of text in
259 * the document. Break it up into pages.
261 pages
= page_breaks(firstline
, lastline
, conf
->page_height
);
264 * Now we're ready to actually lay out the pages. We do this by
265 * looping over _paragraphs_, since we may need to track cross-
266 * references between lines and even across pages.
268 for (pdata
= firstpara
; pdata
; pdata
= pdata
->next
) {
271 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
272 render_line(ldata
, conf
->left_margin
,
273 conf
->paper_height
- conf
->top_margin
,
275 if (ldata
== pdata
->last
)
280 * Render any rectangle (chapter title underline or rule)
281 * that goes with this paragraph.
283 switch (pdata
->rect_type
) {
284 case RECT_CHAPTER_UNDERLINE
:
285 add_rect_to_page(pdata
->last
->page
,
287 (conf
->paper_height
- conf
->top_margin
-
289 conf
->chapter_underline_depth
),
291 conf
->chapter_underline_thickness
);
294 add_rect_to_page(pdata
->first
->page
,
295 conf
->left_margin
+ pdata
->first
->xpos
,
296 (conf
->paper_height
- conf
->top_margin
-
298 pdata
->last
->line_height
),
299 conf
->base_width
- pdata
->first
->xpos
,
300 pdata
->last
->line_height
);
302 default: /* placate gcc */
308 * Start putting together the overall document structure we're
311 doc
= mknew(document
);
312 doc
->fonts
= fontlist
;
314 doc
->paper_width
= conf
->paper_width
;
315 doc
->paper_height
= conf
->paper_height
;
318 * Collect the section heading paragraphs into a document
319 * outline. This is slightly fiddly because the Title paragraph
320 * isn't required to be at the start, although all the others
326 doc
->outline_elements
= mknewa(outline_element
, osize
);
327 doc
->n_outline_elements
= 0;
329 /* First find the title. */
330 for (pdata
= firstpara
; pdata
; pdata
= pdata
->next
) {
331 if (pdata
->outline_level
== 0) {
332 doc
->outline_elements
[0].level
= 0;
333 doc
->outline_elements
[0].pdata
= pdata
;
334 doc
->n_outline_elements
++;
339 /* Then collect the rest. */
340 for (pdata
= firstpara
; pdata
; pdata
= pdata
->next
) {
341 if (pdata
->outline_level
> 0) {
342 if (doc
->n_outline_elements
>= osize
) {
344 doc
->outline_elements
=
345 resize(doc
->outline_elements
, osize
);
348 doc
->outline_elements
[doc
->n_outline_elements
].level
=
349 pdata
->outline_level
;
350 doc
->outline_elements
[doc
->n_outline_elements
].pdata
= pdata
;
351 doc
->n_outline_elements
++;
361 static para_data
*make_para_data(int ptype
, int paux
, int indent
,
362 word
*pkwtext
, word
*pkwtext2
, word
*pwords
,
367 int extra_indent
, firstline_indent
, aux_indent
;
370 pdata
= mknew(para_data
);
371 pdata
->outline_level
= -1;
372 pdata
->outline_title
= NULL
;
373 pdata
->rect_type
= RECT_NONE
;
376 * Choose fonts for this paragraph.
378 * FIXME: All of this ought to be completely
383 pdata
->fonts
[FONT_NORMAL
] = conf
->hr
;
384 pdata
->sizes
[FONT_NORMAL
] = 24;
385 pdata
->fonts
[FONT_EMPH
] = conf
->hi
;
386 pdata
->sizes
[FONT_EMPH
] = 24;
387 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
388 pdata
->sizes
[FONT_CODE
] = 24;
389 pdata
->outline_level
= 0;
394 case para_UnnumberedChapter
:
395 pdata
->fonts
[FONT_NORMAL
] = conf
->hr
;
396 pdata
->sizes
[FONT_NORMAL
] = 20;
397 pdata
->fonts
[FONT_EMPH
] = conf
->hi
;
398 pdata
->sizes
[FONT_EMPH
] = 20;
399 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
400 pdata
->sizes
[FONT_CODE
] = 20;
401 pdata
->outline_level
= 1;
406 pdata
->fonts
[FONT_NORMAL
] = conf
->hr
;
407 pdata
->fonts
[FONT_EMPH
] = conf
->hi
;
408 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
409 pdata
->sizes
[FONT_NORMAL
] =
410 pdata
->sizes
[FONT_EMPH
] =
411 pdata
->sizes
[FONT_CODE
] =
412 (paux
== 0 ?
16 : paux
== 1 ?
14 : 13);
413 pdata
->outline_level
= 2 + paux
;
417 case para_BiblioCited
:
419 case para_NumberedList
:
420 case para_DescribedThing
:
421 case para_Description
:
423 pdata
->fonts
[FONT_NORMAL
] = conf
->tr
;
424 pdata
->sizes
[FONT_NORMAL
] = 12;
425 pdata
->fonts
[FONT_EMPH
] = conf
->ti
;
426 pdata
->sizes
[FONT_EMPH
] = 12;
427 pdata
->fonts
[FONT_CODE
] = conf
->cr
;
428 pdata
->sizes
[FONT_CODE
] = 12;
433 * Also select an indentation level depending on the
434 * paragraph type (list paragraphs other than
435 * para_DescribedThing need extra indent).
437 * (FIXME: Perhaps at some point we might even arrange
438 * for the user to be able to request indented first
439 * lines in paragraphs.)
441 if (ptype
== para_Bullet
||
442 ptype
== para_NumberedList
||
443 ptype
== para_Description
) {
444 extra_indent
= firstline_indent
= conf
->indent_list
;
446 extra_indent
= firstline_indent
= 0;
450 * Find the auxiliary text for this paragraph.
461 * For some heading styles (FIXME: be able to
462 * configure which), the auxiliary text contains
463 * the chapter number and is arranged to be
464 * right-aligned a few points left of the primary
465 * margin. For other styles, the auxiliary text is
466 * the full chapter _name_ and takes up space
467 * within the (wrapped) chapter title, meaning that
468 * we must move the first line indent over to make
471 if (ptype
== para_Heading
|| ptype
== para_Subsect
) {
475 len
= paper_width_simple(pdata
, pkwtext2
);
476 aux_indent
= -len
- conf
->sect_num_left_space
;
478 pdata
->outline_title
=
479 prepare_outline_title(pkwtext2
, L
" ", pwords
);
485 aux2
->type
= word_Normal
;
486 aux2
->text
= ustrdup(L
": ");
487 aux2
->breaks
= FALSE
;
491 firstline_indent
+= paper_width_simple(pdata
, aux
);
492 firstline_indent
+= paper_width_simple(pdata
, aux2
);
494 pdata
->outline_title
=
495 prepare_outline_title(pkwtext
, L
": ", pwords
);
501 * Auxiliary text consisting of a bullet. (FIXME:
502 * configurable bullet.)
507 aux
->type
= word_Normal
;
508 aux
->text
= ustrdup(L
"\x2022");
511 aux_indent
= indent
+ conf
->indent_list_bullet
;
514 case para_NumberedList
:
516 * Auxiliary text consisting of the number followed
517 * by a (FIXME: configurable) full stop.
523 aux2
->type
= word_Normal
;
524 aux2
->text
= ustrdup(L
".");
525 aux2
->breaks
= FALSE
;
527 aux_indent
= indent
+ conf
->indent_list_bullet
;
530 case para_BiblioCited
:
532 * Auxiliary text consisting of the bibliography
533 * reference text, and a trailing space.
539 aux2
->type
= word_Normal
;
540 aux2
->text
= ustrdup(L
" ");
541 aux2
->breaks
= FALSE
;
544 firstline_indent
+= paper_width_simple(pdata
, aux
);
545 firstline_indent
+= paper_width_simple(pdata
, aux2
);
549 if (pdata
->outline_level
>= 0 && !pdata
->outline_title
) {
550 pdata
->outline_title
=
551 prepare_outline_title(NULL
, NULL
, pwords
);
554 wrap_paragraph(pdata
, pwords
, conf
->base_width
,
555 indent
+ firstline_indent
,
556 indent
+ extra_indent
);
558 pdata
->first
->aux_text
= aux
;
559 pdata
->first
->aux_text_2
= aux2
;
560 pdata
->first
->aux_left_indent
= aux_indent
;
563 * Line breaking penalties.
570 case para_UnnumberedChapter
:
572 * Fixed and large penalty for breaking straight
573 * after a heading; corresponding bonus for
574 * breaking straight before.
576 pdata
->first
->penalty_before
= -500000;
577 pdata
->last
->penalty_after
= 500000;
578 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
)
579 ldata
->penalty_after
= 500000;
582 case para_DescribedThing
:
584 * This is treated a bit like a small heading:
585 * there's a penalty for breaking after it (i.e.
586 * between it and its description), and a bonus for
587 * breaking before it (actually _between_ list
590 pdata
->first
->penalty_before
= -200000;
591 pdata
->last
->penalty_after
= 200000;
596 * Most paragraph types: widow/orphan control by
597 * discouraging breaking one line from the end of
600 if (pdata
->first
!= pdata
->last
) {
601 pdata
->first
->penalty_after
= 100000;
602 pdata
->last
->penalty_before
= 100000;
607 standard_line_spacing(pdata
, conf
);
610 * Some kinds of section heading require a page break before
611 * them and an underline after.
613 if (ptype
== para_Title
||
614 ptype
== para_Chapter
||
615 ptype
== para_Appendix
||
616 ptype
== para_UnnumberedChapter
) {
617 pdata
->first
->page_break
= TRUE
;
618 pdata
->first
->space_before
= conf
->chapter_top_space
;
619 pdata
->last
->space_after
+=
620 (conf
->chapter_underline_depth
+
621 conf
->chapter_underline_thickness
);
622 pdata
->rect_type
= RECT_CHAPTER_UNDERLINE
;
628 static void standard_line_spacing(para_data
*pdata
, paper_conf
*conf
)
633 * Set the line spacing for each line in this paragraph.
635 for (ldata
= pdata
->first
; ldata
; ldata
= ldata
->next
) {
636 if (ldata
== pdata
->first
)
637 ldata
->space_before
= conf
->base_para_spacing
/ 2;
639 ldata
->space_before
= conf
->base_leading
/ 2;
640 if (ldata
== pdata
->last
)
641 ldata
->space_after
= conf
->base_para_spacing
/ 2;
643 ldata
->space_after
= conf
->base_leading
/ 2;
644 ldata
->page_break
= FALSE
;
648 static font_encoding
*new_font_encoding(font_data
*font
)
653 fe
= mknew(font_encoding
);
656 if (font
->list
->tail
)
657 font
->list
->tail
->next
= fe
;
659 font
->list
->head
= fe
;
660 font
->list
->tail
= fe
;
665 for (i
= 0; i
< 256; i
++) {
666 fe
->vector
[i
] = NULL
;
668 fe
->to_unicode
[i
] = 0xFFFF;
674 static font_data
*make_std_font(font_list
*fontlist
, char const *name
)
682 widths
= ps_std_font_widths(name
);
686 for (nglyphs
= 0; ps_std_glyphs
[nglyphs
] != NULL
; nglyphs
++);
688 f
= mknew(font_data
);
692 f
->nglyphs
= nglyphs
;
693 f
->glyphs
= ps_std_glyphs
;
695 f
->subfont_map
= mknewa(subfont_map_entry
, nglyphs
);
698 * Our first subfont will contain all of US-ASCII. This isn't
699 * really necessary - we could just create custom subfonts
700 * precisely as the whim of render_string dictated - but
701 * instinct suggests that it might be nice to have the text in
702 * the output files look _marginally_ recognisable.
704 fe
= new_font_encoding(f
);
705 fe
->free_pos
= 0xA1; /* only the top half is free */
706 f
->latest_subfont
= fe
;
708 for (i
= 0; i
< (int)lenof(f
->bmp
); i
++)
711 for (i
= 0; i
< nglyphs
; i
++) {
713 ucs
= ps_glyph_to_unicode(f
->glyphs
[i
]);
714 assert(ucs
!= 0xFFFF);
716 if (ucs
>= 0x20 && ucs
<= 0x7E) {
717 fe
->vector
[ucs
] = f
->glyphs
[i
];
718 fe
->indices
[ucs
] = i
;
719 fe
->to_unicode
[ucs
] = ucs
;
720 f
->subfont_map
[i
].subfont
= fe
;
721 f
->subfont_map
[i
].position
= ucs
;
724 * This character is not yet assigned to a subfont.
726 f
->subfont_map
[i
].subfont
= NULL
;
727 f
->subfont_map
[i
].position
= 0;
734 static int string_width(font_data
*font
, wchar_t const *string
, int *errs
)
741 for (; *string
; string
++) {
744 index
= font
->bmp
[(unsigned short)*string
];
745 if (index
== 0xFFFF) {
749 width
+= font
->widths
[index
];
756 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
);
758 struct paper_width_ctx
{
763 static int paper_width_list(void *vctx
, word
*text
, word
*end
, int *nspaces
) {
765 while (text
&& text
!= end
) {
766 w
+= paper_width_internal(vctx
, text
, nspaces
);
772 static int paper_width_internal(void *vctx
, word
*word
, int *nspaces
)
774 struct paper_width_ctx
*ctx
= (struct paper_width_ctx
*)vctx
;
775 int style
, type
, findex
, width
, errs
;
778 switch (word
->type
) {
788 style
= towordstyle(word
->type
);
789 type
= removeattr(word
->type
);
791 findex
= (style
== word_Normal ? FONT_NORMAL
:
792 style
== word_Emph ? FONT_EMPH
:
795 if (type
== word_Normal
) {
797 } else if (type
== word_WhiteSpace
) {
798 if (findex
!= FONT_CODE
) {
801 return ctx
->minspacewidth
;
804 } else /* if (type == word_Quote) */ {
805 if (word
->aux
== quote_Open
)
806 str
= L
"\x2018"; /* FIXME: configurability! */
808 str
= L
"\x2019"; /* FIXME: configurability! */
811 width
= string_width(ctx
->pdata
->fonts
[findex
], str
, &errs
);
813 if (errs
&& word
->alt
)
814 return paper_width_list(vctx
, word
->alt
, NULL
, nspaces
);
816 return ctx
->pdata
->sizes
[findex
] * width
;
819 static int paper_width(void *vctx
, word
*word
)
821 return paper_width_internal(vctx
, word
, NULL
);
824 static int paper_width_simple(para_data
*pdata
, word
*text
)
826 struct paper_width_ctx ctx
;
830 (pdata
->sizes
[FONT_NORMAL
] *
831 string_width(pdata
->fonts
[FONT_NORMAL
], L
" ", NULL
));
833 return paper_width_list(&ctx
, text
, NULL
, NULL
);
836 static void wrap_paragraph(para_data
*pdata
, word
*words
,
837 int w
, int i1
, int i2
)
839 wrappedline
*wrapping
, *p
;
841 struct paper_width_ctx ctx
;
845 * We're going to need to store the line height in every line
846 * structure we generate.
851 for (i
= 0; i
< NFONTS
; i
++)
852 if (line_height
< pdata
->sizes
[i
])
853 line_height
= pdata
->sizes
[i
];
857 spacewidth
= (pdata
->sizes
[FONT_NORMAL
] *
858 string_width(pdata
->fonts
[FONT_NORMAL
], L
" ", NULL
));
859 if (spacewidth
== 0) {
861 * A font without a space?! Disturbing. I hope this never
862 * comes up, but I'll make a random guess anyway and set my
863 * space width to half the point size.
865 spacewidth
= pdata
->sizes
[FONT_NORMAL
] * 4096 / 2;
869 * I'm going to set the _minimum_ space width to 3/5 of the
870 * standard one, and use the standard one as the optimum.
872 ctx
.minspacewidth
= spacewidth
* 3 / 5;
875 wrapping
= wrap_para(words
, w
- i1
, w
- i2
, paper_width
, &ctx
, spacewidth
);
878 * Having done the wrapping, we now concoct a set of line_data
881 pdata
->first
= pdata
->last
= NULL
;
883 for (p
= wrapping
; p
; p
= p
->next
) {
886 int len
, wid
, spaces
;
888 ldata
= mknew(line_data
);
890 ldata
->pdata
= pdata
;
891 ldata
->first
= p
->begin
;
893 ldata
->line_height
= line_height
;
895 ldata
->xpos
= (p
== wrapping ? i1
: i2
);
898 pdata
->last
->next
= ldata
;
899 ldata
->prev
= pdata
->last
;
901 pdata
->first
= ldata
;
908 len
= paper_width_list(&ctx
, ldata
->first
, ldata
->end
, &spaces
);
909 wid
= (p
== wrapping ? w
- i1
: w
- i2
);
912 ldata
->hshortfall
= wid
- len
;
913 ldata
->nspaces
= spaces
;
915 * This tells us how much the space width needs to
916 * change from _min_spacewidth. But we want to store
917 * its difference from the _natural_ space width, to
918 * make the text rendering easier.
920 ldata
->hshortfall
+= ctx
.minspacewidth
* spaces
;
921 ldata
->hshortfall
-= spacewidth
* spaces
;
923 * Special case: on the last line of a paragraph, we
924 * never stretch spaces.
926 if (ldata
->hshortfall
> 0 && !p
->next
)
927 ldata
->hshortfall
= 0;
929 ldata
->aux_text
= NULL
;
930 ldata
->aux_text_2
= NULL
;
931 ldata
->aux_left_indent
= 0;
932 ldata
->penalty_before
= ldata
->penalty_after
= 0;
937 static page_data
*page_breaks(line_data
*first
, line_data
*last
,
944 * Page breaking is done by a close analogue of the optimal
945 * paragraph wrapping algorithm used by wrap_para(). We work
946 * backwards from the end of the document line by line; for
947 * each line, we contemplate every possible number of lines we
948 * could put on a page starting with that line, determine a
949 * cost function for each one, add it to the pre-computed cost
950 * function for optimally page-breaking everything after that
951 * page, and pick the best option.
953 * Since my line_data structures are only used for this
954 * purpose, I might as well just store the algorithm data
958 for (l
= last
; l
; l
= l
->prev
) {
959 int minheight
, text
= 0, space
= 0;
963 for (m
= l
; m
; m
= m
->next
) {
964 if (m
!= l
&& m
->page_break
)
965 break; /* we've gone as far as we can */
968 space
+= m
->prev
->space_after
;
969 if (m
!= l
|| m
->page_break
)
970 space
+= m
->space_before
;
971 text
+= m
->line_height
;
972 minheight
= text
+ space
;
974 if (m
!= l
&& minheight
> page_height
)
978 * Compute the cost of this arrangement, as the square
979 * of the amount of wasted space on the page.
980 * Exception: if this is the last page before a
981 * mandatory break or the document end, we don't
982 * penalise a large blank area.
984 if (m
->next
&& !m
->next
->page_break
)
986 int x
= page_height
- minheight
;
993 cost
+= (x
* xf
) >> 8;
997 if (m
->next
&& !m
->next
->page_break
) {
998 cost
+= m
->penalty_after
;
999 cost
+= m
->next
->penalty_before
;
1002 if (m
->next
&& !m
->next
->page_break
)
1003 cost
+= m
->next
->bestcost
;
1004 if (l
->bestcost
== -1 || l
->bestcost
> cost
) {
1006 * This is the best option yet for this starting
1010 if (m
->next
&& !m
->next
->page_break
)
1011 l
->vshortfall
= page_height
- minheight
;
1022 * Now go through the line list forwards and assemble the
1032 page
= mknew(page_data
);
1041 page
->first_line
= l
;
1042 page
->last_line
= l
->page_last
;
1044 page
->first_text
= page
->last_text
= NULL
;
1045 page
->first_xref
= page
->last_xref
= NULL
;
1046 page
->first_rect
= page
->last_rect
= NULL
;
1049 * Now assign a y-coordinate to each line on the page.
1052 for (l
= page
->first_line
; l
; l
= l
->next
) {
1053 if (l
!= page
->first_line
)
1054 space
+= l
->prev
->space_after
;
1055 if (l
!= page
->first_line
|| l
->page_break
)
1056 space
+= l
->space_before
;
1057 text
+= l
->line_height
;
1060 l
->ypos
= text
+ space
+
1061 space
* (float)page
->first_line
->vshortfall
/
1062 page
->first_line
->space
;
1064 if (l
== page
->last_line
)
1068 l
= page
->last_line
->next
;
1074 static void add_rect_to_page(page_data
*page
, int x
, int y
, int w
, int h
)
1076 rect
*r
= mknew(rect
);
1079 if (page
->last_rect
)
1080 page
->last_rect
->next
= r
;
1082 page
->first_rect
= r
;
1083 page
->last_rect
= r
;
1091 static void add_string_to_page(page_data
*page
, int x
, int y
,
1092 font_encoding
*fe
, int size
, char *text
)
1094 text_fragment
*frag
;
1096 frag
= mknew(text_fragment
);
1099 if (page
->last_text
)
1100 page
->last_text
->next
= frag
;
1102 page
->first_text
= frag
;
1103 page
->last_text
= frag
;
1108 frag
->fontsize
= size
;
1109 frag
->text
= dupstr(text
);
1113 * Returns the updated x coordinate.
1115 static int render_string(page_data
*page
, font_data
*font
, int fontsize
,
1116 int x
, int y
, wchar_t *str
)
1119 int textpos
, textwid
, glyph
;
1120 font_encoding
*subfont
= NULL
, *sf
;
1122 text
= mknewa(char, 1 + ustrlen(str
));
1123 textpos
= textwid
= 0;
1126 glyph
= font
->bmp
[*str
];
1128 if (glyph
== 0xFFFF)
1129 continue; /* nothing more we can do here */
1132 * Find which subfont this character is going in.
1134 sf
= font
->subfont_map
[glyph
].subfont
;
1140 * This character is not yet in a subfont. Assign one.
1142 if (font
->latest_subfont
->free_pos
>= 0x100)
1143 font
->latest_subfont
= new_font_encoding(font
);
1145 c
= font
->latest_subfont
->free_pos
++;
1146 if (font
->latest_subfont
->free_pos
== 0x7F)
1147 font
->latest_subfont
->free_pos
= 0xA1;
1149 font
->subfont_map
[glyph
].subfont
= font
->latest_subfont
;
1150 font
->subfont_map
[glyph
].position
= c
;
1151 font
->latest_subfont
->vector
[c
] = font
->glyphs
[glyph
];
1152 font
->latest_subfont
->indices
[c
] = glyph
;
1153 font
->latest_subfont
->to_unicode
[c
] = *str
;
1155 sf
= font
->latest_subfont
;
1158 if (!subfont
|| sf
!= subfont
) {
1160 text
[textpos
] = '\0';
1161 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
1164 assert(textpos
== 0);
1170 text
[textpos
++] = font
->subfont_map
[glyph
].position
;
1171 textwid
+= font
->widths
[glyph
] * fontsize
;
1177 text
[textpos
] = '\0';
1178 add_string_to_page(page
, x
, y
, subfont
, fontsize
, text
);
1186 * Returns the updated x coordinate.
1188 static int render_text(page_data
*page
, para_data
*pdata
, line_data
*ldata
,
1189 int x
, int y
, word
*text
, word
*text_end
, xref
**xr
,
1190 int shortfall
, int nspaces
, int *nspace
,
1191 keywordlist
*keywords
)
1193 while (text
&& text
!= text_end
) {
1194 int style
, type
, findex
, errs
;
1198 switch (text
->type
) {
1200 * Start a cross-reference.
1202 case word_HyperLink
:
1203 case word_UpperXref
:
1204 case word_LowerXref
:
1206 if (text
->type
== word_HyperLink
) {
1208 dest
.url
= utoa_dup(text
->text
);
1211 keyword
*kwl
= kw_lookup(keywords
, text
->text
);
1215 assert(kwl
->para
->private_data
);
1216 pdata
= (para_data
*) kwl
->para
->private_data
;
1218 dest
.page
= pdata
->first
->page
;
1222 * Shouldn't happen, but *shrug*
1229 if (dest
.type
!= NONE
) {
1231 (*xr
)->dest
= dest
; /* structure copy */
1232 if (page
->last_xref
)
1233 page
->last_xref
->next
= *xr
;
1235 page
->first_xref
= *xr
;
1236 page
->last_xref
= *xr
;
1240 * FIXME: Ideally we should have, and use, some
1241 * vertical font metric information here so that
1242 * our cross-ref rectangle can take account of
1243 * descenders and the font's cap height. This will
1244 * do for the moment, but it isn't ideal.
1246 (*xr
)->lx
= (*xr
)->rx
= x
;
1248 (*xr
)->ty
= y
+ ldata
->line_height
;
1253 * Finish extending a cross-reference box.
1263 * FIXME: we should do something with this.
1267 style
= towordstyle(text
->type
);
1268 type
= removeattr(text
->type
);
1270 findex
= (style
== word_Normal ? FONT_NORMAL
:
1271 style
== word_Emph ? FONT_EMPH
:
1274 if (type
== word_Normal
) {
1276 } else if (type
== word_WhiteSpace
) {
1277 x
+= pdata
->sizes
[findex
] *
1278 string_width(pdata
->fonts
[findex
], L
" ", NULL
);
1279 if (nspaces
&& findex
!= FONT_CODE
) {
1280 x
+= (*nspace
+1) * shortfall
/ nspaces
;
1281 x
-= *nspace
* shortfall
/ nspaces
;
1285 } else /* if (type == word_Quote) */ {
1286 if (text
->aux
== quote_Open
)
1287 str
= L
"\x2018"; /* FIXME: configurability! */
1289 str
= L
"\x2019"; /* FIXME: configurability! */
1292 (void) string_width(pdata
->fonts
[findex
], str
, &errs
);
1294 if (errs
&& text
->alt
)
1295 x
= render_text(page
, pdata
, ldata
, x
, y
, text
->alt
, NULL
,
1296 xr
, shortfall
, nspaces
, nspace
, keywords
);
1298 x
= render_string(page
, pdata
->fonts
[findex
],
1299 pdata
->sizes
[findex
], x
, y
, str
);
1311 static void render_line(line_data
*ldata
, int left_x
, int top_y
,
1312 xref_dest
*dest
, keywordlist
*keywords
)
1317 if (ldata
->aux_text
) {
1321 x
= render_text(ldata
->page
, ldata
->pdata
, ldata
,
1322 left_x
+ ldata
->aux_left_indent
,
1323 top_y
- ldata
->ypos
,
1324 ldata
->aux_text
, NULL
, &xr
, 0, 0, &nspace
, keywords
);
1325 if (ldata
->aux_text_2
)
1326 render_text(ldata
->page
, ldata
->pdata
, ldata
,
1327 x
, top_y
- ldata
->ypos
,
1328 ldata
->aux_text_2
, NULL
, &xr
, 0, 0, &nspace
, keywords
);
1334 * There might be a cross-reference carried over from a
1337 if (dest
->type
!= NONE
) {
1340 xr
->dest
= *dest
; /* structure copy */
1341 if (ldata
->page
->last_xref
)
1342 ldata
->page
->last_xref
->next
= xr
;
1344 ldata
->page
->first_xref
= xr
;
1345 ldata
->page
->last_xref
= xr
;
1346 xr
->lx
= xr
->rx
= left_x
+ ldata
->xpos
;
1347 xr
->by
= top_y
- ldata
->ypos
;
1348 xr
->ty
= top_y
- ldata
->ypos
+ ldata
->line_height
;
1352 render_text(ldata
->page
, ldata
->pdata
, ldata
, left_x
+ ldata
->xpos
,
1353 top_y
- ldata
->ypos
, ldata
->first
, ldata
->end
, &xr
,
1354 ldata
->hshortfall
, ldata
->nspaces
, &nspace
, keywords
);
1358 * There's a cross-reference continued on to the next line.
1366 static para_data
*code_paragraph(int indent
, word
*words
, paper_conf
*conf
)
1368 para_data
*pdata
= mknew(para_data
);
1371 * For code paragraphs, I'm going to hack grievously and
1372 * pretend the three normal fonts are the three code paragraph
1375 pdata
->fonts
[FONT_NORMAL
] = conf
->cb
;
1376 pdata
->fonts
[FONT_EMPH
] = conf
->co
;
1377 pdata
->fonts
[FONT_CODE
] = conf
->cb
;
1378 pdata
->sizes
[FONT_NORMAL
] =
1379 pdata
->sizes
[FONT_EMPH
] =
1380 pdata
->sizes
[FONT_CODE
] = 12;
1382 pdata
->first
= pdata
->last
= NULL
;
1383 pdata
->outline_level
= -1;
1384 pdata
->rect_type
= RECT_NONE
;
1386 for (; words
; words
= words
->next
) {
1387 wchar_t *t
, *e
, *start
;
1388 word
*lhead
= NULL
, *ltail
= NULL
, *w
;
1390 int prev
= -1, curr
;
1393 if (words
->next
&& words
->next
->type
== word_Emph
) {
1394 e
= words
->next
->text
;
1395 words
= words
->next
;
1405 else if (*e
== L
'i')
1407 else if (*e
== L
'b')
1424 * We've isolated a maximal subsequence of the line
1425 * which has the same emphasis. Form it into a word
1431 w
->type
= (prev
== 0 ? word_WeakCode
:
1432 prev
== 1 ? word_Emph
: word_Normal
);
1433 w
->text
= mknewa(wchar_t, t
-start
+1);
1434 memcpy(w
->text
, start
, (t
-start
) * sizeof(wchar_t));
1435 w
->text
[t
-start
] = '\0';
1448 ldata
= mknew(line_data
);
1450 ldata
->pdata
= pdata
;
1451 ldata
->first
= lhead
;
1453 ldata
->line_height
= conf
->base_font_size
* 4096;
1455 ldata
->xpos
= indent
;
1458 pdata
->last
->next
= ldata
;
1459 ldata
->prev
= pdata
->last
;
1461 pdata
->first
= ldata
;
1465 pdata
->last
= ldata
;
1467 ldata
->hshortfall
= 0;
1469 ldata
->aux_text
= NULL
;
1470 ldata
->aux_text_2
= NULL
;
1471 ldata
->aux_left_indent
= 0;
1472 /* General opprobrium for breaking in a code paragraph. */
1473 ldata
->penalty_before
= ldata
->penalty_after
= 50000;
1476 standard_line_spacing(pdata
, conf
);
1481 static para_data
*rule_paragraph(int indent
, paper_conf
*conf
)
1483 para_data
*pdata
= mknew(para_data
);
1486 ldata
= mknew(line_data
);
1488 ldata
->pdata
= pdata
;
1489 ldata
->first
= NULL
;
1491 ldata
->line_height
= conf
->rule_thickness
;
1493 ldata
->xpos
= indent
;
1498 ldata
->hshortfall
= 0;
1500 ldata
->aux_text
= NULL
;
1501 ldata
->aux_text_2
= NULL
;
1502 ldata
->aux_left_indent
= 0;
1505 * Better to break after a rule than before it
1507 ldata
->penalty_after
+= 100000;
1508 ldata
->penalty_before
+= -100000;
1510 pdata
->first
= pdata
->last
= ldata
;
1511 pdata
->outline_level
= -1;
1512 pdata
->rect_type
= RECT_RULE
;
1514 standard_line_spacing(pdata
, conf
);
1520 * Plain-text-like formatting for outline titles.
1522 static void paper_rdaddw(rdstring
*rs
, word
*text
) {
1523 for (; text
; text
= text
->next
) switch (text
->type
) {
1524 case word_HyperLink
:
1526 case word_UpperXref
:
1527 case word_LowerXref
:
1536 case word_WhiteSpace
:
1537 case word_EmphSpace
:
1538 case word_CodeSpace
:
1539 case word_WkCodeSpace
:
1541 case word_EmphQuote
:
1542 case word_CodeQuote
:
1543 case word_WkCodeQuote
:
1544 assert(text
->type
!= word_CodeQuote
&&
1545 text
->type
!= word_WkCodeQuote
);
1546 if (towordstyle(text
->type
) == word_Emph
&&
1547 (attraux(text
->aux
) == attr_First
||
1548 attraux(text
->aux
) == attr_Only
))
1549 rdadd(rs
, L
'_'); /* FIXME: configurability */
1550 else if (towordstyle(text
->type
) == word_Code
&&
1551 (attraux(text
->aux
) == attr_First
||
1552 attraux(text
->aux
) == attr_Only
))
1553 rdadd(rs
, L
'\''); /* FIXME: configurability */
1554 if (removeattr(text
->type
) == word_Normal
) {
1555 rdadds(rs
, text
->text
);
1556 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
1558 } else if (removeattr(text
->type
) == word_Quote
) {
1559 rdadd(rs
, L
'\''); /* fixme: configurability */
1561 if (towordstyle(text
->type
) == word_Emph
&&
1562 (attraux(text
->aux
) == attr_Last
||
1563 attraux(text
->aux
) == attr_Only
))
1564 rdadd(rs
, L
'_'); /* FIXME: configurability */
1565 else if (towordstyle(text
->type
) == word_Code
&&
1566 (attraux(text
->aux
) == attr_Last
||
1567 attraux(text
->aux
) == attr_Only
))
1568 rdadd(rs
, L
'\''); /* FIXME: configurability */
1573 static wchar_t *prepare_outline_title(word
*first
, wchar_t *separator
,
1576 rdstring rs
= {0, 0, NULL
};
1579 paper_rdaddw(&rs
, first
);
1581 rdadds(&rs
, separator
);
1583 paper_rdaddw(&rs
, second
);