Initial ligature support. This adds support for emitting ligatures, and adds
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * TODO in future work:
14 *
15 * - linearised PDF, perhaps?
16 *
17 * - I'm uncertain of whether I need to include a ToUnicode CMap
18 * in each of my font definitions in PDF. Currently things (by
19 * which I mean cut and paste out of acroread) seem to be
20 * working fairly happily without it, but I don't know.
21 *
22 * - rather than the ugly aux_text mechanism for rendering chapter
23 * titles, we could actually build the correct word list and
24 * wrap it as a whole.
25 *
26 * - get vertical font metrics and use them to position the PDF
27 * xref boxes more pleasantly
28 *
29 * - configurability
30 * * page header and footer should be configurable; we should
31 * be able to shift the page number elsewhere, and add other
32 * things such as the current chapter/section title and fixed
33 * text
34 * * remove the fixed mapping from heading levels to heading
35 * styles; offer a menu of styles from which the user can
36 * choose at every heading level
37 * * first-line indent in paragraphs
38 * * fixed text: `Contents', `Index', the colon-space and full
39 * stop in chapter title constructions
40 * * configurable location of contents?
41 * * certainly configurably _remove_ the contents, and possibly
42 * also the index
43 * * double-sided document switch?
44 * + means you have two header/footer formats which
45 * alternate
46 * + and means that mandatory page breaks before chapter
47 * titles should include a blank page if necessary to
48 * start the next section to a right-hand page
49 *
50 * - title pages
51 *
52 * - ability to import other Type 1 fonts
53 * * we need to parse the font to extract its metrics
54 * * then we pass the font bodily to both PS and PDF so it can
55 * be included in the output file
56 *
57 * - character substitution for better typography?
58 * * fi, fl, ffi, ffl ligatures
59 * * use real ellipsis rather than ...
60 * * a hyphen in a word by itself might prefer to be an en-dash
61 * * (Americans might even want a convenient way to use an
62 * em-dash)
63 * * DON'T DO ANY OF THE ABOVE WITHIN \c OR \cw!
64 * * substituting `minus' for `hyphen' in the standard encoding
65 * is probably preferable in Courier, though certainly not in
66 * the main text font
67 * * if I do do this lot, I'm rather inclined to at least try
68 * to think up a configurable way to do it so that Americans
69 * can do em-dash tricks without my intervention and other
70 * people can do other odd things too.
71 */
72
73 #include <assert.h>
74 #include <stdio.h>
75 #include <stdarg.h>
76 #include <stdlib.h>
77
78 #include "halibut.h"
79 #include "paper.h"
80
81 typedef struct paper_conf_Tag paper_conf;
82 typedef struct paper_idx_Tag paper_idx;
83
84 typedef struct {
85 font_data *fonts[NFONTS];
86 int font_size;
87 } font_cfg;
88
89 struct paper_conf_Tag {
90 int paper_width;
91 int paper_height;
92 int left_margin;
93 int top_margin;
94 int right_margin;
95 int bottom_margin;
96 int indent_list_bullet;
97 int indent_list_after;
98 int indent_list;
99 int indent_quote;
100 int base_leading;
101 int base_para_spacing;
102 int chapter_top_space;
103 int sect_num_left_space;
104 int chapter_underline_depth;
105 int chapter_underline_thickness;
106 int rule_thickness;
107 font_cfg fbase, fcode, ftitle, fchapter, *fsect;
108 int nfsect;
109 int contents_indent_step;
110 int contents_margin;
111 int leader_separation;
112 int index_gutter;
113 int index_cols;
114 int index_minsep;
115 int pagenum_fontsize;
116 int footer_distance;
117 wchar_t *lquote, *rquote, *bullet;
118 wchar_t *contents_text, *index_text;
119 /* These are derived from the above */
120 int base_width;
121 int page_height;
122 int index_colwidth;
123 };
124
125 struct paper_idx_Tag {
126 /*
127 * Word list giving the page numbers on which this index entry
128 * appears. Also the last word in the list, for ease of
129 * construction.
130 */
131 word *words;
132 word *lastword;
133 /*
134 * The last page added to the list (so we can ensure we don't
135 * add one twice).
136 */
137 page_data *lastpage;
138 };
139
140 enum {
141 word_PageXref = word_NotWordType + 1
142 };
143
144 /* Flags for render_string() */
145 #define RS_NOLIG 1
146
147 static font_data *make_std_font(font_list *fontlist, char const *name);
148 static void wrap_paragraph(para_data *pdata, word *words,
149 int w, int i1, int i2, paper_conf *conf);
150 static page_data *page_breaks(line_data *first, line_data *last,
151 int page_height, int ncols, int headspace);
152 static int render_string(page_data *page, font_data *font, int fontsize,
153 int x, int y, wchar_t *str, unsigned flags);
154 static int render_line(line_data *ldata, int left_x, int top_y,
155 xref_dest *dest, keywordlist *keywords, indexdata *idx,
156 paper_conf *conf);
157 static void render_para(para_data *pdata, paper_conf *conf,
158 keywordlist *keywords, indexdata *idx,
159 paragraph *index_placeholder, page_data *index_page);
160 static int string_width(font_data *font, wchar_t const *string, int *errs,
161 unsigned flags);
162 static int paper_width_simple(para_data *pdata, word *text, paper_conf *conf);
163 static para_data *code_paragraph(int indent, word *words, paper_conf *conf);
164 static para_data *rule_paragraph(int indent, paper_conf *conf);
165 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
166 static para_data *make_para_data(int ptype, int paux, int indent, int rmargin,
167 word *pkwtext, word *pkwtext2, word *pwords,
168 paper_conf *conf);
169 static void standard_line_spacing(para_data *pdata, paper_conf *conf);
170 static wchar_t *prepare_outline_title(word *first, wchar_t *separator,
171 word *second);
172 static word *fake_word(wchar_t *text);
173 static word *fake_space_word(void);
174 static word *fake_page_ref(page_data *page);
175 static word *fake_end_ref(void);
176 static word *prepare_contents_title(word *first, wchar_t *separator,
177 word *second);
178 static void fold_into_page(page_data *dest, page_data *src, int right_shift);
179
180 static int fonts_ok(wchar_t *string, ...)
181 {
182 font_data *font;
183 va_list ap;
184 int ret = TRUE;
185
186 va_start(ap, string);
187 while ( (font = va_arg(ap, font_data *)) != NULL) {
188 int errs;
189 (void) string_width(font, string, &errs, 0);
190 if (errs) {
191 ret = FALSE;
192 break;
193 }
194 }
195 va_end(ap);
196
197 return ret;
198 }
199
200 static void paper_cfg_fonts(font_data **fonts, font_list *fontlist,
201 wchar_t *wp, filepos *fpos) {
202 font_data *f;
203 char *fn;
204 int i;
205
206 for (i = 0; i < NFONTS && *wp; i++, wp = uadv(wp)) {
207 fn = utoa_dup(wp, CS_ASCII);
208 f = make_std_font(fontlist, fn);
209 if (f)
210 fonts[i] = f;
211 else
212 /* FIXME: proper error */
213 error(err_nofont, fpos, wp);
214 }
215 }
216
217 static paper_conf paper_configure(paragraph *source, font_list *fontlist) {
218 paragraph *p;
219 paper_conf ret;
220
221 /*
222 * Defaults.
223 */
224 ret.paper_width = 595 * UNITS_PER_PT;
225 ret.paper_height = 842 * UNITS_PER_PT;
226 ret.left_margin = 72 * UNITS_PER_PT;
227 ret.top_margin = 72 * UNITS_PER_PT;
228 ret.right_margin = 72 * UNITS_PER_PT;
229 ret.bottom_margin = 108 * UNITS_PER_PT;
230 ret.indent_list_bullet = 6 * UNITS_PER_PT;
231 ret.indent_list_after = 18 * UNITS_PER_PT;
232 ret.indent_quote = 18 * UNITS_PER_PT;
233 ret.base_leading = UNITS_PER_PT;
234 ret.base_para_spacing = 10 * UNITS_PER_PT;
235 ret.chapter_top_space = 72 * UNITS_PER_PT;
236 ret.sect_num_left_space = 12 * UNITS_PER_PT;
237 ret.chapter_underline_depth = 14 * UNITS_PER_PT;
238 ret.chapter_underline_thickness = 3 * UNITS_PER_PT;
239 ret.rule_thickness = 1 * UNITS_PER_PT;
240 ret.fbase.font_size = 12;
241 ret.fbase.fonts[FONT_NORMAL] = make_std_font(fontlist, "Times-Roman");
242 ret.fbase.fonts[FONT_EMPH] = make_std_font(fontlist, "Times-Italic");
243 ret.fbase.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
244 ret.fcode.font_size = 12;
245 ret.fcode.fonts[FONT_NORMAL] = make_std_font(fontlist, "Courier-Bold");
246 ret.fcode.fonts[FONT_EMPH] = make_std_font(fontlist, "Courier-Oblique");
247 ret.fcode.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
248 ret.ftitle.font_size = 24;
249 ret.ftitle.fonts[FONT_NORMAL] = make_std_font(fontlist, "Helvetica-Bold");
250 ret.ftitle.fonts[FONT_EMPH] =
251 make_std_font(fontlist, "Helvetica-BoldOblique");
252 ret.ftitle.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
253 ret.fchapter.font_size = 20;
254 ret.fchapter.fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
255 ret.fchapter.fonts[FONT_EMPH] =
256 make_std_font(fontlist, "Helvetica-BoldOblique");
257 ret.fchapter.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
258 ret.nfsect = 3;
259 ret.fsect = snewn(ret.nfsect, font_cfg);
260 ret.fsect[0].font_size = 16;
261 ret.fsect[0].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
262 ret.fsect[0].fonts[FONT_EMPH] =
263 make_std_font(fontlist, "Helvetica-BoldOblique");
264 ret.fsect[0].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
265 ret.fsect[1].font_size = 14;
266 ret.fsect[1].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
267 ret.fsect[1].fonts[FONT_EMPH] =
268 make_std_font(fontlist, "Helvetica-BoldOblique");
269 ret.fsect[1].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
270 ret.fsect[2].font_size = 13;
271 ret.fsect[2].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
272 ret.fsect[2].fonts[FONT_EMPH] =
273 make_std_font(fontlist, "Helvetica-BoldOblique");
274 ret.fsect[2].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
275 ret.contents_indent_step = 24 * UNITS_PER_PT;
276 ret.contents_margin = 84 * UNITS_PER_PT;
277 ret.leader_separation = 12 * UNITS_PER_PT;
278 ret.index_gutter = 36 * UNITS_PER_PT;
279 ret.index_cols = 2;
280 ret.index_minsep = 18 * UNITS_PER_PT;
281 ret.pagenum_fontsize = 12;
282 ret.footer_distance = 32 * UNITS_PER_PT;
283 ret.lquote = L"\x2018\0\x2019\0'\0'\0\0";
284 ret.rquote = uadv(ret.lquote);
285 ret.bullet = L"\x2022\0-\0\0";
286 ret.contents_text = L"Contents";
287 ret.index_text = L"Index";
288
289 /*
290 * Two-pass configuration so that we can pick up global config
291 * (e.g. `quotes') before having it overridden by specific
292 * config (`paper-quotes'), irrespective of the order in which
293 * they occur.
294 */
295 for (p = source; p; p = p->next) {
296 if (p->type == para_Config) {
297 if (!ustricmp(p->keyword, L"quotes")) {
298 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
299 ret.lquote = uadv(p->keyword);
300 ret.rquote = uadv(ret.lquote);
301 }
302 }
303 }
304 }
305
306 for (p = source; p; p = p->next) {
307 p->private_data = NULL;
308 if (p->type == para_Config) {
309 if (!ustricmp(p->keyword, L"paper-quotes")) {
310 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
311 ret.lquote = uadv(p->keyword);
312 ret.rquote = uadv(ret.lquote);
313 }
314 } else if (!ustricmp(p->keyword, L"contents")) {
315 ret.contents_text = uadv(p->keyword);
316 } else if (!ustricmp(p->keyword, L"index")) {
317 ret.index_text = uadv(p->keyword);
318 } else if (!ustricmp(p->keyword, L"paper-bullet")) {
319 ret.bullet = uadv(p->keyword);
320 } else if (!ustricmp(p->keyword, L"paper-page-width")) {
321 ret.paper_width =
322 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
323 } else if (!ustricmp(p->keyword, L"paper-page-height")) {
324 ret.paper_height =
325 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
326 } else if (!ustricmp(p->keyword, L"paper-left-margin")) {
327 ret.left_margin =
328 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
329 } else if (!ustricmp(p->keyword, L"paper-top-margin")) {
330 ret.top_margin =
331 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
332 } else if (!ustricmp(p->keyword, L"paper-right-margin")) {
333 ret.right_margin =
334 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
335 } else if (!ustricmp(p->keyword, L"paper-bottom-margin")) {
336 ret.bottom_margin =
337 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
338 } else if (!ustricmp(p->keyword, L"paper-list-indent")) {
339 ret.indent_list_bullet =
340 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
341 } else if (!ustricmp(p->keyword, L"paper-listitem-indent")) {
342 ret.indent_list =
343 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
344 } else if (!ustricmp(p->keyword, L"paper-quote-indent")) {
345 ret.indent_quote =
346 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
347 } else if (!ustricmp(p->keyword, L"paper-base-leading")) {
348 ret.base_leading =
349 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
350 } else if (!ustricmp(p->keyword, L"paper-base-para-spacing")) {
351 ret.base_para_spacing =
352 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
353 } else if (!ustricmp(p->keyword, L"paper-chapter-top-space")) {
354 ret.chapter_top_space =
355 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
356 } else if (!ustricmp(p->keyword, L"paper-sect-num-left-space")) {
357 ret.sect_num_left_space =
358 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
359 } else if (!ustricmp(p->keyword, L"paper-chapter-underline-depth")) {
360 ret.chapter_underline_depth =
361 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
362 } else if (!ustricmp(p->keyword, L"paper-chapter-underline-thickness")) {
363 ret.chapter_underline_thickness =
364 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
365 } else if (!ustricmp(p->keyword, L"paper-rule-thickness")) {
366 ret.rule_thickness =
367 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
368 } else if (!ustricmp(p->keyword, L"paper-contents-indent-step")) {
369 ret.contents_indent_step =
370 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
371 } else if (!ustricmp(p->keyword, L"paper-contents-margin")) {
372 ret.contents_margin =
373 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
374 } else if (!ustricmp(p->keyword, L"paper-leader-separation")) {
375 ret.leader_separation =
376 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
377 } else if (!ustricmp(p->keyword, L"paper-index-gutter")) {
378 ret.index_gutter =
379 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
380 } else if (!ustricmp(p->keyword, L"paper-index-minsep")) {
381 ret.index_minsep =
382 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
383 } else if (!ustricmp(p->keyword, L"paper-footer-distance")) {
384 ret.footer_distance =
385 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
386 } else if (!ustricmp(p->keyword, L"paper-base-font-size")) {
387 ret.fbase.font_size = utoi(uadv(p->keyword));
388 } else if (!ustricmp(p->keyword, L"paper-index-columns")) {
389 ret.index_cols = utoi(uadv(p->keyword));
390 } else if (!ustricmp(p->keyword, L"paper-pagenum-font-size")) {
391 ret.pagenum_fontsize = utoi(uadv(p->keyword));
392 } else if (!ustricmp(p->keyword, L"paper-base-fonts")) {
393 paper_cfg_fonts(ret.fbase.fonts, fontlist, uadv(p->keyword),
394 &p->fpos);
395 } else if (!ustricmp(p->keyword, L"paper-code-font-size")) {
396 ret.fcode.font_size = utoi(uadv(p->keyword));
397 } else if (!ustricmp(p->keyword, L"paper-code-fonts")) {
398 paper_cfg_fonts(ret.fcode.fonts, fontlist, uadv(p->keyword),
399 &p->fpos);
400 } else if (!ustricmp(p->keyword, L"paper-title-font-size")) {
401 ret.ftitle.font_size = utoi(uadv(p->keyword));
402 } else if (!ustricmp(p->keyword, L"paper-title-fonts")) {
403 paper_cfg_fonts(ret.ftitle.fonts, fontlist, uadv(p->keyword),
404 &p->fpos);
405 } else if (!ustricmp(p->keyword, L"paper-chapter-font-size")) {
406 ret.fchapter.font_size = utoi(uadv(p->keyword));
407 } else if (!ustricmp(p->keyword, L"paper-chapter-fonts")) {
408 paper_cfg_fonts(ret.fchapter.fonts, fontlist, uadv(p->keyword),
409 &p->fpos);
410 } else if (!ustricmp(p->keyword, L"paper-section-font-size")) {
411 wchar_t *q = uadv(p->keyword);
412 int n = 0;
413 if (uisdigit(*q)) {
414 n = utoi(q);
415 q = uadv(q);
416 }
417 if (n >= ret.nfsect) {
418 int i;
419 ret.fsect = sresize(ret.fsect, n+1, font_cfg);
420 for (i = ret.nfsect; i <= n; i++)
421 ret.fsect[i] = ret.fsect[ret.nfsect-1];
422 ret.nfsect = n+1;
423 }
424 ret.fsect[n].font_size = utoi(q);
425 } else if (!ustricmp(p->keyword, L"paper-section-fonts")) {
426 wchar_t *q = uadv(p->keyword);
427 int n = 0;
428 if (uisdigit(*q)) {
429 n = utoi(q);
430 q = uadv(q);
431 }
432 if (n >= ret.nfsect) {
433 int i;
434 ret.fsect = sresize(ret.fsect, n+1, font_cfg);
435 for (i = ret.nfsect; i <= n; i++)
436 ret.fsect[i] = ret.fsect[ret.nfsect-1];
437 ret.nfsect = n+1;
438 }
439 paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos);
440 }
441 }
442 }
443
444 /*
445 * Set up the derived fields in the conf structure.
446 */
447
448 ret.base_width =
449 ret.paper_width - ret.left_margin - ret.right_margin;
450 ret.page_height =
451 ret.paper_height - ret.top_margin - ret.bottom_margin;
452 ret.indent_list = ret.indent_list_bullet + ret.indent_list_after;
453 ret.index_colwidth =
454 (ret.base_width - (ret.index_cols-1) * ret.index_gutter)
455 / ret.index_cols;
456
457 /*
458 * Now process fallbacks on quote characters and bullets. We
459 * use string_width() to determine whether all of the relevant
460 * fonts contain the same character, and fall back whenever we
461 * find a character which not all of them support.
462 */
463
464 /* Quote characters need not be supported in the fixed code fonts,
465 * but must be in the title and body fonts. */
466 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote))) {
467 int n;
468 if (fonts_ok(ret.lquote,
469 ret.fbase.fonts[FONT_NORMAL],
470 ret.fbase.fonts[FONT_EMPH],
471 ret.ftitle.fonts[FONT_NORMAL],
472 ret.ftitle.fonts[FONT_EMPH],
473 ret.fchapter.fonts[FONT_NORMAL],
474 ret.fchapter.fonts[FONT_EMPH], NULL) &&
475 fonts_ok(ret.rquote,
476 ret.fbase.fonts[FONT_NORMAL],
477 ret.fbase.fonts[FONT_EMPH],
478 ret.ftitle.fonts[FONT_NORMAL],
479 ret.ftitle.fonts[FONT_EMPH],
480 ret.fchapter.fonts[FONT_NORMAL],
481 ret.fchapter.fonts[FONT_EMPH], NULL)) {
482 for (n = 0; n < ret.nfsect; n++)
483 if (!fonts_ok(ret.lquote,
484 ret.fsect[n].fonts[FONT_NORMAL],
485 ret.fsect[n].fonts[FONT_EMPH], NULL) ||
486 !fonts_ok(ret.rquote,
487 ret.fsect[n].fonts[FONT_NORMAL],
488 ret.fsect[n].fonts[FONT_EMPH], NULL))
489 break;
490 if (n == ret.nfsect)
491 break;
492 }
493 ret.lquote = uadv(ret.rquote);
494 ret.rquote = uadv(ret.lquote);
495 }
496
497 /* The bullet character only needs to be supported in the normal body
498 * font (not even in italics). */
499 while (*ret.bullet && *uadv(ret.bullet) &&
500 !fonts_ok(ret.bullet, ret.fbase.fonts[FONT_NORMAL], NULL))
501 ret.bullet = uadv(ret.bullet);
502
503 return ret;
504 }
505
506 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
507 indexdata *idx) {
508 paragraph *p;
509 document *doc;
510 int indent, used_contents;
511 para_data *pdata, *firstpara = NULL, *lastpara = NULL;
512 para_data *firstcont, *lastcont;
513 line_data *firstline, *lastline, *firstcontline, *lastcontline;
514 page_data *pages;
515 font_list *fontlist;
516 paper_conf *conf, ourconf;
517 int has_index;
518 int pagenum;
519 paragraph index_placeholder_para;
520 page_data *first_index_page;
521
522 init_std_fonts();
523 fontlist = snew(font_list);
524 fontlist->head = fontlist->tail = NULL;
525
526 ourconf = paper_configure(sourceform, fontlist);
527 conf = &ourconf;
528
529 /*
530 * Set up a data structure to collect page numbers for each
531 * index entry.
532 */
533 {
534 int i;
535 indexentry *entry;
536
537 has_index = FALSE;
538
539 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
540 paper_idx *pi = snew(paper_idx);
541
542 has_index = TRUE;
543
544 pi->words = pi->lastword = NULL;
545 pi->lastpage = NULL;
546
547 entry->backend_data = pi;
548 }
549 }
550
551 /*
552 * Format the contents entry for each heading.
553 */
554 {
555 word *contents_title;
556 contents_title = fake_word(conf->contents_text);
557
558 firstcont = make_para_data(para_UnnumberedChapter, 0, 0, 0,
559 NULL, NULL, contents_title, conf);
560 lastcont = firstcont;
561 lastcont->next = NULL;
562 firstcontline = firstcont->first;
563 lastcontline = lastcont->last;
564 for (p = sourceform; p; p = p->next) {
565 word *words;
566 int indent;
567
568 switch (p->type) {
569 case para_Chapter:
570 case para_Appendix:
571 case para_UnnumberedChapter:
572 case para_Heading:
573 case para_Subsect:
574 switch (p->type) {
575 case para_Chapter:
576 case para_Appendix:
577 words = prepare_contents_title(p->kwtext, L": ", p->words);
578 indent = 0;
579 break;
580 case para_UnnumberedChapter:
581 words = prepare_contents_title(NULL, NULL, p->words);
582 indent = 0;
583 break;
584 case para_Heading:
585 case para_Subsect:
586 words = prepare_contents_title(p->kwtext2, L" ", p->words);
587 indent = (p->aux + 1) * conf->contents_indent_step;
588 break;
589 }
590 pdata = make_para_data(para_Normal, p->aux, indent,
591 conf->contents_margin,
592 NULL, NULL, words, conf);
593 pdata->next = NULL;
594 pdata->contents_entry = p;
595 lastcont->next = pdata;
596 lastcont = pdata;
597
598 /*
599 * Link all contents line structures together into
600 * a big list.
601 */
602 if (pdata->first) {
603 if (lastcontline) {
604 lastcontline->next = pdata->first;
605 pdata->first->prev = lastcontline;
606 } else {
607 firstcontline = pdata->first;
608 pdata->first->prev = NULL;
609 }
610 lastcontline = pdata->last;
611 lastcontline->next = NULL;
612 }
613
614 break;
615 }
616 }
617
618 /*
619 * And one extra one, for the index.
620 */
621 if (has_index) {
622 pdata = make_para_data(para_Normal, 0, 0,
623 conf->contents_margin,
624 NULL, NULL,
625 fake_word(conf->index_text), conf);
626 pdata->next = NULL;
627 pdata->contents_entry = &index_placeholder_para;
628 lastcont->next = pdata;
629 lastcont = pdata;
630
631 if (pdata->first) {
632 if (lastcontline) {
633 lastcontline->next = pdata->first;
634 pdata->first->prev = lastcontline;
635 } else {
636 firstcontline = pdata->first;
637 pdata->first->prev = NULL;
638 }
639 lastcontline = pdata->last;
640 lastcontline->next = NULL;
641 }
642 }
643 }
644
645 /*
646 * Do the main paragraph formatting.
647 */
648 indent = 0;
649 used_contents = FALSE;
650 firstline = lastline = NULL;
651 for (p = sourceform; p; p = p->next) {
652 p->private_data = NULL;
653
654 switch (p->type) {
655 /*
656 * These paragraph types are either invisible or don't
657 * define text in the normal sense. Either way, they
658 * don't require wrapping.
659 */
660 case para_IM:
661 case para_BR:
662 case para_Biblio:
663 case para_NotParaType:
664 case para_Config:
665 case para_VersionID:
666 case para_NoCite:
667 break;
668
669 /*
670 * These paragraph types don't require wrapping, but
671 * they do affect the line width to which we wrap the
672 * rest of the paragraphs, so we need to pay attention.
673 */
674 case para_LcontPush:
675 indent += conf->indent_list; break;
676 case para_LcontPop:
677 indent -= conf->indent_list; assert(indent >= 0); break;
678 case para_QuotePush:
679 indent += conf->indent_quote; break;
680 case para_QuotePop:
681 indent -= conf->indent_quote; assert(indent >= 0); break;
682
683 /*
684 * This paragraph type is special. Process it
685 * specially.
686 */
687 case para_Code:
688 pdata = code_paragraph(indent, p->words, conf);
689 p->private_data = pdata;
690 if (pdata->first != pdata->last) {
691 pdata->first->penalty_after += 100000;
692 pdata->last->penalty_before += 100000;
693 }
694 break;
695
696 /*
697 * This paragraph is also special.
698 */
699 case para_Rule:
700 pdata = rule_paragraph(indent, conf);
701 p->private_data = pdata;
702 break;
703
704 /*
705 * All of these paragraph types require wrapping in the
706 * ordinary way. So we must supply a set of fonts, a
707 * line width and auxiliary information (e.g. bullet
708 * text) for each one.
709 */
710 case para_Chapter:
711 case para_Appendix:
712 case para_UnnumberedChapter:
713 case para_Heading:
714 case para_Subsect:
715 case para_Normal:
716 case para_BiblioCited:
717 case para_Bullet:
718 case para_NumberedList:
719 case para_DescribedThing:
720 case para_Description:
721 case para_Copyright:
722 case para_Title:
723 pdata = make_para_data(p->type, p->aux, indent, 0,
724 p->kwtext, p->kwtext2, p->words, conf);
725
726 p->private_data = pdata;
727
728 break;
729 }
730
731 if (p->private_data) {
732 pdata = (para_data *)p->private_data;
733
734 /*
735 * If this is the first non-title heading, we link the
736 * contents section in before it.
737 */
738 if (!used_contents && pdata->outline_level > 0) {
739 used_contents = TRUE;
740 if (lastpara)
741 lastpara->next = firstcont;
742 else
743 firstpara = firstcont;
744 lastpara = lastcont;
745 assert(lastpara->next == NULL);
746
747 if (lastline) {
748 lastline->next = firstcontline;
749 firstcontline->prev = lastline;
750 } else {
751 firstline = firstcontline;
752 firstcontline->prev = NULL;
753 }
754 assert(lastcontline != NULL);
755 lastline = lastcontline;
756 lastline->next = NULL;
757 }
758
759 /*
760 * Link all line structures together into a big list.
761 */
762 if (pdata->first) {
763 if (lastline) {
764 lastline->next = pdata->first;
765 pdata->first->prev = lastline;
766 } else {
767 firstline = pdata->first;
768 pdata->first->prev = NULL;
769 }
770 lastline = pdata->last;
771 lastline->next = NULL;
772 }
773
774 /*
775 * Link all paragraph structures together similarly.
776 */
777 pdata->next = NULL;
778 if (lastpara)
779 lastpara->next = pdata;
780 else
781 firstpara = pdata;
782 lastpara = pdata;
783 }
784 }
785
786 /*
787 * Now we have an enormous linked list of every line of text in
788 * the document. Break it up into pages.
789 */
790 pages = page_breaks(firstline, lastline, conf->page_height, 0, 0);
791
792 /*
793 * Number the pages.
794 */
795 {
796 char buf[40];
797 page_data *page;
798
799 pagenum = 0;
800
801 for (page = pages; page; page = page->next) {
802 sprintf(buf, "%d", ++pagenum);
803 page->number = ufroma_dup(buf, CS_ASCII);
804 }
805
806 if (has_index) {
807 first_index_page = snew(page_data);
808 first_index_page->next = first_index_page->prev = NULL;
809 first_index_page->first_line = NULL;
810 first_index_page->last_line = NULL;
811 first_index_page->first_text = first_index_page->last_text = NULL;
812 first_index_page->first_xref = first_index_page->last_xref = NULL;
813 first_index_page->first_rect = first_index_page->last_rect = NULL;
814
815 /* And don't forget the as-yet-uncreated index. */
816 sprintf(buf, "%d", ++pagenum);
817 first_index_page->number = ufroma_dup(buf, CS_ASCII);
818 }
819 }
820
821 /*
822 * Now we're ready to actually lay out the pages. We do this by
823 * looping over _paragraphs_, since we may need to track cross-
824 * references between lines and even across pages.
825 */
826 for (pdata = firstpara; pdata; pdata = pdata->next)
827 render_para(pdata, conf, keywords, idx,
828 &index_placeholder_para, first_index_page);
829
830 /*
831 * Now we've laid out the main body pages, we should have
832 * acquired a full set of page numbers for the index.
833 */
834 if (has_index) {
835 int i;
836 indexentry *entry;
837 word *index_title;
838 para_data *firstidx, *lastidx;
839 line_data *firstidxline, *lastidxline, *ldata;
840 page_data *ipages, *ipages2, *page;
841
842 /*
843 * Create a set of paragraphs for the index.
844 */
845 index_title = fake_word(conf->index_text);
846
847 firstidx = make_para_data(para_UnnumberedChapter, 0, 0, 0,
848 NULL, NULL, index_title, conf);
849 lastidx = firstidx;
850 lastidx->next = NULL;
851 firstidxline = firstidx->first;
852 lastidxline = lastidx->last;
853 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
854 paper_idx *pi = (paper_idx *)entry->backend_data;
855 para_data *text, *pages;
856
857 if (!pi->words)
858 continue;
859
860 text = make_para_data(para_Normal, 0, 0,
861 conf->base_width - conf->index_colwidth,
862 NULL, NULL, entry->text, conf);
863
864 pages = make_para_data(para_Normal, 0, 0,
865 conf->base_width - conf->index_colwidth,
866 NULL, NULL, pi->words, conf);
867
868 text->justification = LEFT;
869 pages->justification = RIGHT;
870 text->last->space_after = pages->first->space_before =
871 conf->base_leading / 2;
872
873 pages->last->space_after = text->first->space_before =
874 conf->base_leading;
875
876 assert(text->first);
877 assert(pages->first);
878 assert(lastidxline);
879 assert(lastidx);
880
881 /*
882 * If feasible, fold the two halves of the index entry
883 * together.
884 */
885 if (text->last->real_shortfall + pages->first->real_shortfall >
886 conf->index_colwidth + conf->index_minsep) {
887 text->last->space_after = -1;
888 pages->first->space_before = -pages->first->line_height+1;
889 }
890
891 lastidx->next = text;
892 text->next = pages;
893 pages->next = NULL;
894 lastidx = pages;
895
896 /*
897 * Link all index line structures together into
898 * a big list.
899 */
900 text->last->next = pages->first;
901 pages->first->prev = text->last;
902
903 lastidxline->next = text->first;
904 text->first->prev = lastidxline;
905
906 lastidxline = pages->last;
907
908 /*
909 * Breaking an index entry anywhere is so bad that I
910 * think I'm going to forbid it totally.
911 */
912 for (ldata = text->first; ldata && ldata->next;
913 ldata = ldata->next) {
914 ldata->next->space_before += ldata->space_after + 1;
915 ldata->space_after = -1;
916 }
917 }
918
919 /*
920 * Now break the index into pages.
921 */
922 ipages = page_breaks(firstidxline, firstidxline, conf->page_height,
923 0, 0);
924 ipages2 = page_breaks(firstidxline->next, lastidxline,
925 conf->page_height,
926 conf->index_cols,
927 firstidxline->space_before +
928 firstidxline->line_height +
929 firstidxline->space_after);
930
931 /*
932 * This will have put each _column_ of the index on a
933 * separate page, which isn't what we want. Fold the pages
934 * back together.
935 */
936 page = ipages2;
937 while (page) {
938 int i;
939
940 for (i = 1; i < conf->index_cols; i++)
941 if (page->next) {
942 page_data *tpage;
943
944 fold_into_page(page, page->next,
945 i * (conf->index_colwidth +
946 conf->index_gutter));
947 tpage = page->next;
948 page->next = page->next->next;
949 if (page->next)
950 page->next->prev = page;
951 sfree(tpage);
952 }
953
954 page = page->next;
955 }
956 /* Also fold the heading on to the same page as the index items. */
957 fold_into_page(ipages, ipages2, 0);
958 ipages->next = ipages2->next;
959 if (ipages->next)
960 ipages->next->prev = ipages;
961 sfree(ipages2);
962 fold_into_page(first_index_page, ipages, 0);
963 first_index_page->next = ipages->next;
964 if (first_index_page->next)
965 first_index_page->next->prev = first_index_page;
966 sfree(ipages);
967 ipages = first_index_page;
968
969 /*
970 * Number the index pages, except the already-numbered
971 * first one.
972 */
973 for (page = ipages->next; page; page = page->next) {
974 char buf[40];
975 sprintf(buf, "%d", ++pagenum);
976 page->number = ufroma_dup(buf, CS_ASCII);
977 }
978
979 /*
980 * Render the index pages.
981 */
982 for (pdata = firstidx; pdata; pdata = pdata->next)
983 render_para(pdata, conf, keywords, idx,
984 &index_placeholder_para, first_index_page);
985
986 /*
987 * Link the index page list on to the end of the main page
988 * list.
989 */
990 if (!pages)
991 pages = ipages;
992 else {
993 for (page = pages; page->next; page = page->next);
994 page->next = ipages;
995 }
996
997 /*
998 * Same with the paragraph list, which will cause the index
999 * to be mentioned in the document outline.
1000 */
1001 if (!firstpara)
1002 firstpara = firstidx;
1003 else
1004 lastpara->next = firstidx;
1005 lastpara = lastidx;
1006 }
1007
1008 /*
1009 * Draw the headers and footers.
1010 *
1011 * FIXME: this should be fully configurable, but for the moment
1012 * I'm just going to put in page numbers in the centre of a
1013 * footer and leave it at that.
1014 */
1015 {
1016 page_data *page;
1017
1018 for (page = pages; page; page = page->next) {
1019 int width;
1020
1021 width = conf->pagenum_fontsize *
1022 string_width(conf->fbase.fonts[FONT_NORMAL], page->number,
1023 NULL, 0);
1024
1025 render_string(page, conf->fbase.fonts[FONT_NORMAL],
1026 conf->pagenum_fontsize,
1027 conf->left_margin + (conf->base_width - width)/2,
1028 conf->bottom_margin - conf->footer_distance,
1029 page->number, 0);
1030 }
1031 }
1032
1033 /*
1034 * Start putting together the overall document structure we're
1035 * going to return.
1036 */
1037 doc = snew(document);
1038 doc->fonts = fontlist;
1039 doc->pages = pages;
1040 doc->paper_width = conf->paper_width;
1041 doc->paper_height = conf->paper_height;
1042
1043 /*
1044 * Collect the section heading paragraphs into a document
1045 * outline. This is slightly fiddly because the Title paragraph
1046 * isn't required to be at the start, although all the others
1047 * must be in order.
1048 */
1049 {
1050 int osize = 20;
1051
1052 doc->outline_elements = snewn(osize, outline_element);
1053 doc->n_outline_elements = 0;
1054
1055 /* First find the title. */
1056 for (pdata = firstpara; pdata; pdata = pdata->next) {
1057 if (pdata->outline_level == 0) {
1058 doc->outline_elements[0].level = 0;
1059 doc->outline_elements[0].pdata = pdata;
1060 doc->n_outline_elements++;
1061 break;
1062 }
1063 }
1064
1065 /* Then collect the rest. */
1066 for (pdata = firstpara; pdata; pdata = pdata->next) {
1067 if (pdata->outline_level > 0) {
1068 if (doc->n_outline_elements >= osize) {
1069 osize += 20;
1070 doc->outline_elements =
1071 sresize(doc->outline_elements, osize, outline_element);
1072 }
1073
1074 doc->outline_elements[doc->n_outline_elements].level =
1075 pdata->outline_level;
1076 doc->outline_elements[doc->n_outline_elements].pdata = pdata;
1077 doc->n_outline_elements++;
1078 }
1079 }
1080 }
1081
1082 return doc;
1083 }
1084
1085 static void setfont(para_data *p, font_cfg *f) {
1086 int i;
1087
1088 for (i = 0; i < NFONTS; i++) {
1089 p->fonts[i] = f->fonts[i];
1090 p->sizes[i] = f->font_size;
1091 }
1092 }
1093
1094 static para_data *make_para_data(int ptype, int paux, int indent, int rmargin,
1095 word *pkwtext, word *pkwtext2, word *pwords,
1096 paper_conf *conf)
1097 {
1098 para_data *pdata;
1099 line_data *ldata;
1100 int extra_indent, firstline_indent, aux_indent;
1101 word *aux, *aux2;
1102
1103 pdata = snew(para_data);
1104 pdata->outline_level = -1;
1105 pdata->outline_title = NULL;
1106 pdata->rect_type = RECT_NONE;
1107 pdata->contents_entry = NULL;
1108 pdata->justification = JUST;
1109
1110 /*
1111 * Choose fonts for this paragraph.
1112 */
1113 switch (ptype) {
1114 case para_Title:
1115 setfont(pdata, &conf->ftitle);
1116 pdata->outline_level = 0;
1117 break;
1118
1119 case para_Chapter:
1120 case para_Appendix:
1121 case para_UnnumberedChapter:
1122 setfont(pdata, &conf->fchapter);
1123 pdata->outline_level = 1;
1124 break;
1125
1126 case para_Heading:
1127 case para_Subsect:
1128 setfont(pdata,
1129 &conf->fsect[paux >= conf->nfsect ? conf->nfsect - 1 : paux]);
1130 pdata->outline_level = 2 + paux;
1131 break;
1132
1133 case para_Normal:
1134 case para_BiblioCited:
1135 case para_Bullet:
1136 case para_NumberedList:
1137 case para_DescribedThing:
1138 case para_Description:
1139 case para_Copyright:
1140 setfont(pdata, &conf->fbase);
1141 break;
1142 }
1143
1144 /*
1145 * Also select an indentation level depending on the
1146 * paragraph type (list paragraphs other than
1147 * para_DescribedThing need extra indent).
1148 *
1149 * (FIXME: Perhaps at some point we might even arrange
1150 * for the user to be able to request indented first
1151 * lines in paragraphs.)
1152 */
1153 if (ptype == para_Bullet ||
1154 ptype == para_NumberedList ||
1155 ptype == para_Description) {
1156 extra_indent = firstline_indent = conf->indent_list;
1157 } else {
1158 extra_indent = firstline_indent = 0;
1159 }
1160
1161 /*
1162 * Find the auxiliary text for this paragraph.
1163 */
1164 aux = aux2 = NULL;
1165 aux_indent = 0;
1166
1167 switch (ptype) {
1168 case para_Chapter:
1169 case para_Appendix:
1170 case para_Heading:
1171 case para_Subsect:
1172 /*
1173 * For some heading styles (FIXME: be able to
1174 * configure which), the auxiliary text contains
1175 * the chapter number and is arranged to be
1176 * right-aligned a few points left of the primary
1177 * margin. For other styles, the auxiliary text is
1178 * the full chapter _name_ and takes up space
1179 * within the (wrapped) chapter title, meaning that
1180 * we must move the first line indent over to make
1181 * space for it.
1182 */
1183 if (ptype == para_Heading || ptype == para_Subsect) {
1184 int len;
1185
1186 aux = pkwtext2;
1187 len = paper_width_simple(pdata, pkwtext2, conf);
1188 aux_indent = -len - conf->sect_num_left_space;
1189
1190 pdata->outline_title =
1191 prepare_outline_title(pkwtext2, L" ", pwords);
1192 } else {
1193 aux = pkwtext;
1194 aux2 = fake_word(L": ");
1195 aux_indent = 0;
1196
1197 firstline_indent += paper_width_simple(pdata, aux, conf);
1198 firstline_indent += paper_width_simple(pdata, aux2, conf);
1199
1200 pdata->outline_title =
1201 prepare_outline_title(pkwtext, L": ", pwords);
1202 }
1203 break;
1204
1205 case para_Bullet:
1206 /*
1207 * Auxiliary text consisting of a bullet.
1208 */
1209 aux = fake_word(conf->bullet);
1210 aux_indent = indent + conf->indent_list_bullet;
1211 break;
1212
1213 case para_NumberedList:
1214 /*
1215 * Auxiliary text consisting of the number followed
1216 * by a (FIXME: configurable) full stop.
1217 */
1218 aux = pkwtext;
1219 aux2 = fake_word(L".");
1220 aux_indent = indent + conf->indent_list_bullet;
1221 break;
1222
1223 case para_BiblioCited:
1224 /*
1225 * Auxiliary text consisting of the bibliography
1226 * reference text, and a trailing space.
1227 */
1228 aux = pkwtext;
1229 aux2 = fake_word(L" ");
1230 aux_indent = indent;
1231 firstline_indent += paper_width_simple(pdata, aux, conf);
1232 firstline_indent += paper_width_simple(pdata, aux2, conf);
1233 break;
1234 }
1235
1236 if (pdata->outline_level >= 0 && !pdata->outline_title) {
1237 pdata->outline_title =
1238 prepare_outline_title(NULL, NULL, pwords);
1239 }
1240
1241 wrap_paragraph(pdata, pwords, conf->base_width - rmargin,
1242 indent + firstline_indent,
1243 indent + extra_indent, conf);
1244
1245 pdata->first->aux_text = aux;
1246 pdata->first->aux_text_2 = aux2;
1247 pdata->first->aux_left_indent = aux_indent;
1248
1249 /*
1250 * Line breaking penalties.
1251 */
1252 switch (ptype) {
1253 case para_Chapter:
1254 case para_Appendix:
1255 case para_Heading:
1256 case para_Subsect:
1257 case para_UnnumberedChapter:
1258 /*
1259 * Fixed and large penalty for breaking straight
1260 * after a heading; corresponding bonus for
1261 * breaking straight before.
1262 */
1263 pdata->first->penalty_before = -500000;
1264 pdata->last->penalty_after = 500000;
1265 for (ldata = pdata->first; ldata; ldata = ldata->next)
1266 ldata->penalty_after = 500000;
1267 break;
1268
1269 case para_DescribedThing:
1270 /*
1271 * This is treated a bit like a small heading:
1272 * there's a penalty for breaking after it (i.e.
1273 * between it and its description), and a bonus for
1274 * breaking before it (actually _between_ list
1275 * items).
1276 */
1277 pdata->first->penalty_before = -200000;
1278 pdata->last->penalty_after = 200000;
1279 break;
1280
1281 default:
1282 /*
1283 * Most paragraph types: widow/orphan control by
1284 * discouraging breaking one line from the end of
1285 * any paragraph.
1286 */
1287 if (pdata->first != pdata->last) {
1288 pdata->first->penalty_after = 100000;
1289 pdata->last->penalty_before = 100000;
1290 }
1291 break;
1292 }
1293
1294 standard_line_spacing(pdata, conf);
1295
1296 /*
1297 * Some kinds of section heading require a page break before
1298 * them and an underline after.
1299 */
1300 if (ptype == para_Title ||
1301 ptype == para_Chapter ||
1302 ptype == para_Appendix ||
1303 ptype == para_UnnumberedChapter) {
1304 pdata->first->page_break = TRUE;
1305 pdata->first->space_before = conf->chapter_top_space;
1306 pdata->last->space_after +=
1307 (conf->chapter_underline_depth +
1308 conf->chapter_underline_thickness);
1309 pdata->rect_type = RECT_CHAPTER_UNDERLINE;
1310 }
1311
1312 return pdata;
1313 }
1314
1315 static void standard_line_spacing(para_data *pdata, paper_conf *conf)
1316 {
1317 line_data *ldata;
1318
1319 /*
1320 * Set the line spacing for each line in this paragraph.
1321 */
1322 for (ldata = pdata->first; ldata; ldata = ldata->next) {
1323 if (ldata == pdata->first)
1324 ldata->space_before = conf->base_para_spacing / 2;
1325 else
1326 ldata->space_before = conf->base_leading / 2;
1327 if (ldata == pdata->last)
1328 ldata->space_after = conf->base_para_spacing / 2;
1329 else
1330 ldata->space_after = conf->base_leading / 2;
1331 ldata->page_break = FALSE;
1332 }
1333 }
1334
1335 static font_encoding *new_font_encoding(font_data *font)
1336 {
1337 font_encoding *fe;
1338 int i;
1339
1340 fe = snew(font_encoding);
1341 fe->next = NULL;
1342
1343 if (font->list->tail)
1344 font->list->tail->next = fe;
1345 else
1346 font->list->head = fe;
1347 font->list->tail = fe;
1348
1349 fe->font = font;
1350 fe->free_pos = 0x21;
1351
1352 for (i = 0; i < 256; i++) {
1353 fe->vector[i] = NULL;
1354 fe->indices[i] = -1;
1355 fe->to_unicode[i] = 0xFFFF;
1356 }
1357
1358 return fe;
1359 }
1360
1361 int kern_cmp(void *a, void *b)
1362 {
1363 kern_pair const *ka = a, *kb = b;
1364
1365 if (ka->left < kb->left)
1366 return -1;
1367 if (ka->left > kb->left)
1368 return 1;
1369 if (ka->right < kb->right)
1370 return -1;
1371 if (ka->right > kb->right)
1372 return 1;
1373 return 0;
1374 }
1375
1376 int lig_cmp(void *a, void *b)
1377 {
1378 ligature const *la = a, *lb = b;
1379
1380 if (la->left < lb->left)
1381 return -1;
1382 if (la->left > lb->left)
1383 return 1;
1384 if (la->right < lb->right)
1385 return -1;
1386 if (la->right > lb->right)
1387 return 1;
1388 return 0;
1389 }
1390
1391 /* This wouldn't be necessary if C had closures. */
1392 static font_info *glyph_cmp_fi;
1393
1394 static int glyph_cmp(void const *a, void const *b)
1395 {
1396 return strcmp(glyph_cmp_fi->glyphs[*(unsigned short *)a],
1397 glyph_cmp_fi->glyphs[*(unsigned short *)b]);
1398 }
1399
1400 /*
1401 * Set up the glyphsbyname index for a font.
1402 */
1403 void font_index_glyphs(font_info *fi) {
1404 int i;
1405
1406 fi->glyphsbyname = snewn(fi->nglyphs, unsigned short);
1407 for (i = 0; i < fi->nglyphs; i++)
1408 fi->glyphsbyname[i] = i;
1409 glyph_cmp_fi = fi;
1410 qsort(fi->glyphsbyname, fi->nglyphs, sizeof(fi->glyphsbyname[0]),
1411 glyph_cmp);
1412 }
1413
1414 int find_glyph(font_info const *fi, char const *name) {
1415 int i, j, k, r;
1416
1417 i = -1;
1418 j = fi->nglyphs;
1419 while (j-i > 1) {
1420 k = (i + j) / 2;
1421 r = strcmp(fi->glyphs[fi->glyphsbyname[k]], name);
1422 if (r == 0)
1423 return fi->glyphsbyname[k];
1424 else if (r > 0)
1425 j = k;
1426 else
1427 i = k;
1428 }
1429 return -1;
1430 }
1431
1432 static font_data *make_std_font(font_list *fontlist, char const *name)
1433 {
1434 int nglyphs;
1435 font_info const *fi;
1436 font_data *f;
1437 font_encoding *fe;
1438 int i;
1439
1440 for (fe = fontlist->head; fe; fe = fe->next)
1441 if (strcmp(fe->font->info->name, name) == 0)
1442 return fe->font;
1443
1444 for (fi = all_fonts; fi; fi = fi->next)
1445 if (strcmp(fi->name, name) == 0) break;
1446 if (!fi) return NULL;
1447
1448 f = snew(font_data);
1449
1450 f->list = fontlist;
1451 f->info = fi;
1452 nglyphs = f->info->nglyphs;
1453 f->subfont_map = snewn(nglyphs, subfont_map_entry);
1454
1455 /*
1456 * Our first subfont will contain all of US-ASCII. This isn't
1457 * really necessary - we could just create custom subfonts
1458 * precisely as the whim of render_string dictated - but
1459 * instinct suggests that it might be nice to have the text in
1460 * the output files look _marginally_ recognisable.
1461 */
1462 fe = new_font_encoding(f);
1463 fe->free_pos = 0xA1; /* only the top half is free */
1464 f->latest_subfont = fe;
1465
1466 for (i = 0; i < nglyphs; i++) {
1467 wchar_t ucs;
1468 ucs = ps_glyph_to_unicode(f->info->glyphs[i]);
1469 if (ucs >= 0x20 && ucs <= 0x7E) {
1470 fe->vector[ucs] = f->info->glyphs[i];
1471 fe->indices[ucs] = i;
1472 fe->to_unicode[ucs] = ucs;
1473 f->subfont_map[i].subfont = fe;
1474 f->subfont_map[i].position = ucs;
1475 } else {
1476 /*
1477 * This character is not yet assigned to a subfont.
1478 */
1479 f->subfont_map[i].subfont = NULL;
1480 f->subfont_map[i].position = 0;
1481 }
1482 }
1483
1484 return f;
1485 }
1486
1487 /* NB: arguments are glyph numbers from font->bmp. */
1488 static int find_kern(font_data *font, int lindex, int rindex)
1489 {
1490 kern_pair wantkp;
1491 kern_pair const *kp;
1492
1493 if (lindex == 0xFFFF || rindex == 0xFFFF)
1494 return 0;
1495 wantkp.left = lindex;
1496 wantkp.right = rindex;
1497 kp = find234(font->info->kerns, &wantkp, NULL);
1498 if (kp == NULL)
1499 return 0;
1500 return kp->kern;
1501 }
1502
1503 static int find_lig(font_data *font, int lindex, int rindex)
1504 {
1505 ligature wantlig;
1506 ligature const *lig;
1507
1508 if (lindex == 0xFFFF || rindex == 0xFFFF)
1509 return 0xFFFF;
1510 wantlig.left = lindex;
1511 wantlig.right = rindex;
1512 lig = find234(font->info->ligs, &wantlig, NULL);
1513 if (lig == NULL)
1514 return 0xFFFF;
1515 return lig->lig;
1516 }
1517
1518 static int utoglyph(font_info const *fi, wchar_t u) {
1519 return (u < 0 || u > 0xFFFF ? 0xFFFF : fi->bmp[u]);
1520 }
1521
1522 static int string_width(font_data *font, wchar_t const *string, int *errs,
1523 unsigned flags)
1524 {
1525 int width = 0;
1526 int nindex, index, oindex, lindex;
1527
1528 if (errs)
1529 *errs = 0;
1530
1531 oindex = 0xFFFF;
1532 index = utoglyph(font->info, *string);
1533 for (; *string; string++) {
1534 nindex = utoglyph(font->info, string[1]);
1535
1536 if (index == 0xFFFF) {
1537 if (errs)
1538 *errs = 1;
1539 } else {
1540 if (!(flags & RS_NOLIG) &&
1541 (lindex = find_lig(font, index, nindex)) != 0xFFFF) {
1542 index = lindex;
1543 continue;
1544 }
1545 width += find_kern(font, oindex, index) +
1546 font->info->widths[index];
1547 }
1548 oindex = index;
1549 index = nindex;
1550 }
1551
1552 return width;
1553 }
1554
1555 static int paper_width_internal(void *vctx, word *word, int *nspaces);
1556
1557 struct paper_width_ctx {
1558 int minspacewidth;
1559 para_data *pdata;
1560 paper_conf *conf;
1561 };
1562
1563 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
1564 int w = 0;
1565 while (text && text != end) {
1566 w += paper_width_internal(vctx, text, nspaces);
1567 text = text->next;
1568 }
1569 return w;
1570 }
1571
1572 static int paper_width_internal(void *vctx, word *word, int *nspaces)
1573 {
1574 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
1575 int style, type, findex, width, errs;
1576 wchar_t *str;
1577 unsigned flags = 0;
1578
1579 switch (word->type) {
1580 case word_HyperLink:
1581 case word_HyperEnd:
1582 case word_UpperXref:
1583 case word_LowerXref:
1584 case word_PageXref:
1585 case word_XrefEnd:
1586 case word_IndexRef:
1587 return 0;
1588 }
1589
1590 style = towordstyle(word->type);
1591 type = removeattr(word->type);
1592
1593 findex = (style == word_Normal ? FONT_NORMAL :
1594 style == word_Emph ? FONT_EMPH :
1595 FONT_CODE);
1596
1597 if (style == word_Code || style == word_WeakCode) flags |= RS_NOLIG;
1598
1599 if (type == word_Normal) {
1600 str = word->text;
1601 } else if (type == word_WhiteSpace) {
1602 if (findex != FONT_CODE) {
1603 if (nspaces)
1604 (*nspaces)++;
1605 return ctx->minspacewidth;
1606 } else
1607 str = L" ";
1608 } else /* if (type == word_Quote) */ {
1609 if (word->aux == quote_Open)
1610 str = ctx->conf->lquote;
1611 else
1612 str = ctx->conf->rquote;
1613 }
1614
1615 width = string_width(ctx->pdata->fonts[findex], str, &errs, flags);
1616
1617 if (errs && word->alt)
1618 return paper_width_list(vctx, word->alt, NULL, nspaces);
1619 else
1620 return ctx->pdata->sizes[findex] * width;
1621 }
1622
1623 static int paper_width(void *vctx, word *word)
1624 {
1625 return paper_width_internal(vctx, word, NULL);
1626 }
1627
1628 static int paper_width_simple(para_data *pdata, word *text, paper_conf *conf)
1629 {
1630 struct paper_width_ctx ctx;
1631
1632 ctx.pdata = pdata;
1633 ctx.minspacewidth =
1634 (pdata->sizes[FONT_NORMAL] *
1635 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL, 0));
1636 ctx.conf = conf;
1637
1638 return paper_width_list(&ctx, text, NULL, NULL);
1639 }
1640
1641 static void wrap_paragraph(para_data *pdata, word *words,
1642 int w, int i1, int i2, paper_conf *conf)
1643 {
1644 wrappedline *wrapping, *p;
1645 int spacewidth;
1646 struct paper_width_ctx ctx;
1647 int line_height;
1648
1649 /*
1650 * We're going to need to store the line height in every line
1651 * structure we generate.
1652 */
1653 {
1654 int i;
1655 line_height = 0;
1656 for (i = 0; i < NFONTS; i++)
1657 if (line_height < pdata->sizes[i])
1658 line_height = pdata->sizes[i];
1659 line_height *= UNITS_PER_PT;
1660 }
1661
1662 spacewidth = (pdata->sizes[FONT_NORMAL] *
1663 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL, 0));
1664 if (spacewidth == 0) {
1665 /*
1666 * A font without a space?! Disturbing. I hope this never
1667 * comes up, but I'll make a random guess anyway and set my
1668 * space width to half the point size.
1669 */
1670 spacewidth = pdata->sizes[FONT_NORMAL] * UNITS_PER_PT / 2;
1671 }
1672
1673 /*
1674 * I'm going to set the _minimum_ space width to 3/5 of the
1675 * standard one, and use the standard one as the optimum.
1676 */
1677 ctx.minspacewidth = spacewidth * 3 / 5;
1678 ctx.pdata = pdata;
1679 ctx.conf = conf;
1680
1681 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
1682
1683 /*
1684 * Having done the wrapping, we now concoct a set of line_data
1685 * structures.
1686 */
1687 pdata->first = pdata->last = NULL;
1688
1689 for (p = wrapping; p; p = p->next) {
1690 line_data *ldata;
1691 word *wd;
1692 int len, wid, spaces;
1693
1694 ldata = snew(line_data);
1695
1696 ldata->pdata = pdata;
1697 ldata->first = p->begin;
1698 ldata->end = p->end;
1699 ldata->line_height = line_height;
1700
1701 ldata->xpos = (p == wrapping ? i1 : i2);
1702
1703 if (pdata->last) {
1704 pdata->last->next = ldata;
1705 ldata->prev = pdata->last;
1706 } else {
1707 pdata->first = ldata;
1708 ldata->prev = NULL;
1709 }
1710 ldata->next = NULL;
1711 pdata->last = ldata;
1712
1713 spaces = 0;
1714 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
1715 wid = (p == wrapping ? w - i1 : w - i2);
1716 wd = ldata->first;
1717
1718 ldata->hshortfall = wid - len;
1719 ldata->nspaces = spaces;
1720 /*
1721 * This tells us how much the space width needs to
1722 * change from _min_spacewidth. But we want to store
1723 * its difference from the _natural_ space width, to
1724 * make the text rendering easier.
1725 */
1726 ldata->hshortfall += ctx.minspacewidth * spaces;
1727 ldata->hshortfall -= spacewidth * spaces;
1728 ldata->real_shortfall = ldata->hshortfall;
1729 /*
1730 * Special case: on the last line of a paragraph, we
1731 * never stretch spaces.
1732 */
1733 if (ldata->hshortfall > 0 && !p->next)
1734 ldata->hshortfall = 0;
1735
1736 ldata->aux_text = NULL;
1737 ldata->aux_text_2 = NULL;
1738 ldata->aux_left_indent = 0;
1739 ldata->penalty_before = ldata->penalty_after = 0;
1740 }
1741
1742 }
1743
1744 static page_data *page_breaks(line_data *first, line_data *last,
1745 int page_height, int ncols, int headspace)
1746 {
1747 line_data *l, *m;
1748 page_data *ph, *pt;
1749 int n, n1, this_height;
1750
1751 /*
1752 * Page breaking is done by a close analogue of the optimal
1753 * paragraph wrapping algorithm used by wrap_para(). We work
1754 * backwards from the end of the document line by line; for
1755 * each line, we contemplate every possible number of lines we
1756 * could put on a page starting with that line, determine a
1757 * cost function for each one, add it to the pre-computed cost
1758 * function for optimally page-breaking everything after that
1759 * page, and pick the best option.
1760 *
1761 * This is made slightly more complex by the fact that we have
1762 * a multi-column index with a heading at the top of the
1763 * _first_ page, meaning that the first _ncols_ pages must have
1764 * a different length. Hence, we must do the wrapping ncols+1
1765 * times over, hypothetically trying to put every subsequence
1766 * on every possible page.
1767 *
1768 * Since my line_data structures are only used for this
1769 * purpose, I might as well just store the algorithm data
1770 * directly in them.
1771 */
1772
1773 for (l = last; l; l = l->prev) {
1774 l->bestcost = snewn(ncols+1, int);
1775 l->vshortfall = snewn(ncols+1, int);
1776 l->text = snewn(ncols+1, int);
1777 l->space = snewn(ncols+1, int);
1778 l->page_last = snewn(ncols+1, line_data *);
1779
1780 for (n = 0; n <= ncols; n++) {
1781 int minheight, text = 0, space = 0;
1782 int cost;
1783
1784 n1 = (n < ncols ? n+1 : ncols);
1785 if (n < ncols)
1786 this_height = page_height - headspace;
1787 else
1788 this_height = page_height;
1789
1790 l->bestcost[n] = -1;
1791 for (m = l; m; m = m->next) {
1792 if (m != l && m->page_break)
1793 break; /* we've gone as far as we can */
1794
1795 if (m != l) {
1796 if (m->prev->space_after > 0)
1797 space += m->prev->space_after;
1798 else
1799 text += m->prev->space_after;
1800 }
1801 if (m != l || m->page_break) {
1802 if (m->space_before > 0)
1803 space += m->space_before;
1804 else
1805 text += m->space_before;
1806 }
1807 text += m->line_height;
1808 minheight = text + space;
1809
1810 if (m != l && minheight > this_height)
1811 break;
1812
1813 /*
1814 * If the space after this paragraph is _negative_
1815 * (which means the next line is folded on to this
1816 * one, which happens in the index), we absolutely
1817 * cannot break here.
1818 */
1819 if (m->space_after >= 0) {
1820
1821 /*
1822 * Compute the cost of this arrangement, as the
1823 * square of the amount of wasted space on the
1824 * page. Exception: if this is the last page
1825 * before a mandatory break or the document
1826 * end, we don't penalise a large blank area.
1827 */
1828 if (m != last && m->next && !m->next->page_break)
1829 {
1830 int x = (this_height - minheight) / FUNITS_PER_PT *
1831 4096.0;
1832 int xf;
1833
1834 xf = x & 0xFF;
1835 x >>= 8;
1836
1837 cost = x*x;
1838 cost += (x * xf) >> 8;
1839 } else
1840 cost = 0;
1841
1842 if (m != last && m->next && !m->next->page_break) {
1843 cost += m->penalty_after;
1844 cost += m->next->penalty_before;
1845 }
1846
1847 if (m != last && m->next && !m->next->page_break)
1848 cost += m->next->bestcost[n1];
1849 if (l->bestcost[n] == -1 || l->bestcost[n] > cost) {
1850 /*
1851 * This is the best option yet for this
1852 * starting point.
1853 */
1854 l->bestcost[n] = cost;
1855 if (m != last && m->next && !m->next->page_break)
1856 l->vshortfall[n] = this_height - minheight;
1857 else
1858 l->vshortfall[n] = 0;
1859 l->text[n] = text;
1860 l->space[n] = space;
1861 l->page_last[n] = m;
1862 }
1863 }
1864
1865 if (m == last)
1866 break;
1867 }
1868 }
1869 }
1870
1871 /*
1872 * Now go through the line list forwards and assemble the
1873 * actual pages.
1874 */
1875 ph = pt = NULL;
1876
1877 l = first;
1878 n = 0;
1879 while (l) {
1880 page_data *page;
1881 int text, space, head;
1882
1883 page = snew(page_data);
1884 page->next = NULL;
1885 page->prev = pt;
1886 if (pt)
1887 pt->next = page;
1888 else
1889 ph = page;
1890 pt = page;
1891
1892 page->first_line = l;
1893 page->last_line = l->page_last[n];
1894
1895 page->first_text = page->last_text = NULL;
1896 page->first_xref = page->last_xref = NULL;
1897 page->first_rect = page->last_rect = NULL;
1898
1899 /*
1900 * Now assign a y-coordinate to each line on the page.
1901 */
1902 text = space = 0;
1903 head = (n < ncols ? headspace : 0);
1904 for (l = page->first_line; l; l = l->next) {
1905 if (l != page->first_line) {
1906 if (l->prev->space_after > 0)
1907 space += l->prev->space_after;
1908 else
1909 text += l->prev->space_after;
1910 }
1911 if (l != page->first_line || l->page_break) {
1912 if (l->space_before > 0)
1913 space += l->space_before;
1914 else
1915 text += l->space_before;
1916 }
1917 text += l->line_height;
1918
1919 l->page = page;
1920 l->ypos = text + space + head;
1921 if (page->first_line->space[n]) {
1922 l->ypos += space * (float)page->first_line->vshortfall[n] /
1923 page->first_line->space[n];
1924 }
1925
1926 if (l == page->last_line)
1927 break;
1928 }
1929
1930 l = page->last_line;
1931 if (l == last)
1932 break;
1933 l = l->next;
1934
1935 n = (n < ncols ? n+1 : ncols);
1936 }
1937
1938 return ph;
1939 }
1940
1941 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
1942 {
1943 rect *r = snew(rect);
1944
1945 r->next = NULL;
1946 if (page->last_rect)
1947 page->last_rect->next = r;
1948 else
1949 page->first_rect = r;
1950 page->last_rect = r;
1951
1952 r->x = x;
1953 r->y = y;
1954 r->w = w;
1955 r->h = h;
1956 }
1957
1958 static void add_string_to_page(page_data *page, int x, int y,
1959 font_encoding *fe, int size, char *text,
1960 int width)
1961 {
1962 text_fragment *frag;
1963
1964 frag = snew(text_fragment);
1965 frag->next = NULL;
1966
1967 if (page->last_text)
1968 page->last_text->next = frag;
1969 else
1970 page->first_text = frag;
1971 page->last_text = frag;
1972
1973 frag->x = x;
1974 frag->y = y;
1975 frag->fe = fe;
1976 frag->fontsize = size;
1977 frag->text = dupstr(text);
1978 frag->width = width;
1979 }
1980
1981 /*
1982 * Returns the updated x coordinate.
1983 */
1984 static int render_string(page_data *page, font_data *font, int fontsize,
1985 int x, int y, wchar_t *str, unsigned flags)
1986 {
1987 char *text;
1988 int textpos, textwid, kern, nglyph, glyph, oglyph, lig;
1989 font_encoding *subfont = NULL, *sf;
1990
1991 text = snewn(1 + ustrlen(str), char);
1992 textpos = textwid = 0;
1993
1994 glyph = 0xFFFF;
1995 nglyph = utoglyph(font->info, *str);
1996 while (*str) {
1997 oglyph = glyph;
1998 glyph = nglyph;
1999 nglyph = utoglyph(font->info, str[1]);
2000
2001 if (glyph == 0xFFFF) {
2002 str++;
2003 continue; /* nothing more we can do here */
2004 }
2005
2006 if (!(flags & RS_NOLIG) &&
2007 (lig = find_lig(font, glyph, nglyph)) != 0xFFFF) {
2008 nglyph = lig;
2009 str++;
2010 continue;
2011 }
2012
2013 /*
2014 * Find which subfont this character is going in.
2015 */
2016 sf = font->subfont_map[glyph].subfont;
2017
2018 if (!sf) {
2019 int c;
2020
2021 /*
2022 * This character is not yet in a subfont. Assign one.
2023 */
2024 if (font->latest_subfont->free_pos >= 0x100)
2025 font->latest_subfont = new_font_encoding(font);
2026
2027 c = font->latest_subfont->free_pos++;
2028 if (font->latest_subfont->free_pos == 0x7F)
2029 font->latest_subfont->free_pos = 0xA1;
2030
2031 font->subfont_map[glyph].subfont = font->latest_subfont;
2032 font->subfont_map[glyph].position = c;
2033 font->latest_subfont->vector[c] = font->info->glyphs[glyph];
2034 font->latest_subfont->indices[c] = glyph;
2035 font->latest_subfont->to_unicode[c] = *str;
2036
2037 sf = font->latest_subfont;
2038 }
2039
2040 kern = find_kern(font, oglyph, glyph) * fontsize;
2041
2042 if (!subfont || sf != subfont || kern) {
2043 if (subfont) {
2044 text[textpos] = '\0';
2045 add_string_to_page(page, x, y, subfont, fontsize, text,
2046 textwid);
2047 x += textwid + kern;
2048 } else {
2049 assert(textpos == 0);
2050 }
2051 textpos = 0;
2052 textwid = 0;
2053 subfont = sf;
2054 }
2055
2056 text[textpos++] = font->subfont_map[glyph].position;
2057 textwid += font->info->widths[glyph] * fontsize;
2058
2059 str++;
2060 }
2061
2062 if (textpos > 0) {
2063 text[textpos] = '\0';
2064 add_string_to_page(page, x, y, subfont, fontsize, text, textwid);
2065 x += textwid;
2066 }
2067
2068 return x;
2069 }
2070
2071 /*
2072 * Returns the updated x coordinate.
2073 */
2074 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
2075 int x, int y, word *text, word *text_end, xref **xr,
2076 int shortfall, int nspaces, int *nspace,
2077 keywordlist *keywords, indexdata *idx, paper_conf *conf)
2078 {
2079 while (text && text != text_end) {
2080 int style, type, findex, errs;
2081 wchar_t *str;
2082 xref_dest dest;
2083 unsigned flags = 0;
2084
2085 switch (text->type) {
2086 /*
2087 * Start a cross-reference.
2088 */
2089 case word_HyperLink:
2090 case word_UpperXref:
2091 case word_LowerXref:
2092 case word_PageXref:
2093
2094 if (text->type == word_HyperLink) {
2095 dest.type = URL;
2096 dest.url = utoa_dup(text->text, CS_ASCII);
2097 dest.page = NULL;
2098 } else if (text->type == word_PageXref) {
2099 dest.type = PAGE;
2100 dest.url = NULL;
2101 dest.page = (page_data *)text->private_data;
2102 } else {
2103 keyword *kwl = kw_lookup(keywords, text->text);
2104 para_data *pdata;
2105
2106 if (kwl) {
2107 assert(kwl->para->private_data);
2108 pdata = (para_data *) kwl->para->private_data;
2109 dest.type = PAGE;
2110 dest.page = pdata->first->page;
2111 dest.url = NULL;
2112 } else {
2113 /*
2114 * Shouldn't happen, but *shrug*
2115 */
2116 dest.type = NONE;
2117 dest.page = NULL;
2118 dest.url = NULL;
2119 }
2120 }
2121 if (dest.type != NONE) {
2122 *xr = snew(xref);
2123 (*xr)->dest = dest; /* structure copy */
2124 if (page->last_xref)
2125 page->last_xref->next = *xr;
2126 else
2127 page->first_xref = *xr;
2128 page->last_xref = *xr;
2129 (*xr)->next = NULL;
2130
2131 /*
2132 * FIXME: Ideally we should have, and use, some
2133 * vertical font metric information here so that
2134 * our cross-ref rectangle can take account of
2135 * descenders and the font's cap height. This will
2136 * do for the moment, but it isn't ideal.
2137 */
2138 (*xr)->lx = (*xr)->rx = x;
2139 (*xr)->by = y;
2140 (*xr)->ty = y + ldata->line_height;
2141 }
2142 goto nextword;
2143
2144 /*
2145 * Finish extending a cross-reference box.
2146 */
2147 case word_HyperEnd:
2148 case word_XrefEnd:
2149 *xr = NULL;
2150 goto nextword;
2151
2152 /*
2153 * Add the current page number to the list of pages
2154 * referenced by an index entry.
2155 */
2156 case word_IndexRef:
2157 /*
2158 * We don't create index references in contents entries.
2159 */
2160 if (!pdata->contents_entry) {
2161 indextag *tag;
2162 int i;
2163
2164 tag = index_findtag(idx, text->text);
2165 if (!tag)
2166 goto nextword;
2167
2168 for (i = 0; i < tag->nrefs; i++) {
2169 indexentry *entry = tag->refs[i];
2170 paper_idx *pi = (paper_idx *)entry->backend_data;
2171
2172 /*
2173 * If the same index term is indexed twice
2174 * within the same section, we only want to
2175 * mention it once in the index.
2176 */
2177 if (pi->lastpage != page) {
2178 word **wp;
2179
2180 if (pi->lastword) {
2181 pi->lastword = pi->lastword->next =
2182 fake_word(L",");
2183 pi->lastword = pi->lastword->next =
2184 fake_space_word();
2185 wp = &pi->lastword->next;
2186 } else
2187 wp = &pi->words;
2188
2189 pi->lastword = *wp =
2190 fake_page_ref(page);
2191 pi->lastword = pi->lastword->next =
2192 fake_word(page->number);
2193 pi->lastword = pi->lastword->next =
2194 fake_end_ref();
2195 }
2196
2197 pi->lastpage = page;
2198 }
2199 }
2200 goto nextword;
2201 }
2202
2203 style = towordstyle(text->type);
2204 type = removeattr(text->type);
2205
2206 findex = (style == word_Normal ? FONT_NORMAL :
2207 style == word_Emph ? FONT_EMPH :
2208 FONT_CODE);
2209
2210 if (style == word_Code || style == word_WeakCode) flags |= RS_NOLIG;
2211
2212 if (type == word_Normal) {
2213 str = text->text;
2214 } else if (type == word_WhiteSpace) {
2215 x += pdata->sizes[findex] *
2216 string_width(pdata->fonts[findex], L" ", NULL, 0);
2217 if (nspaces && findex != FONT_CODE) {
2218 x += (*nspace+1) * shortfall / nspaces;
2219 x -= *nspace * shortfall / nspaces;
2220 (*nspace)++;
2221 }
2222 goto nextword;
2223 } else /* if (type == word_Quote) */ {
2224 if (text->aux == quote_Open)
2225 str = conf->lquote;
2226 else
2227 str = conf->rquote;
2228 }
2229
2230 (void) string_width(pdata->fonts[findex], str, &errs, flags);
2231
2232 if (errs && text->alt)
2233 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
2234 xr, shortfall, nspaces, nspace, keywords, idx,
2235 conf);
2236 else
2237 x = render_string(page, pdata->fonts[findex],
2238 pdata->sizes[findex], x, y, str, flags);
2239
2240 if (*xr)
2241 (*xr)->rx = x;
2242
2243 nextword:
2244 text = text->next;
2245 }
2246
2247 return x;
2248 }
2249
2250 /*
2251 * Returns the last x position used on the line.
2252 */
2253 static int render_line(line_data *ldata, int left_x, int top_y,
2254 xref_dest *dest, keywordlist *keywords, indexdata *idx,
2255 paper_conf *conf)
2256 {
2257 int nspace;
2258 xref *xr;
2259 int ret = 0;
2260
2261 if (ldata->aux_text) {
2262 int x;
2263 xr = NULL;
2264 nspace = 0;
2265 x = render_text(ldata->page, ldata->pdata, ldata,
2266 left_x + ldata->aux_left_indent,
2267 top_y - ldata->ypos,
2268 ldata->aux_text, NULL, &xr, 0, 0, &nspace,
2269 keywords, idx, conf);
2270 if (ldata->aux_text_2)
2271 render_text(ldata->page, ldata->pdata, ldata,
2272 x, top_y - ldata->ypos,
2273 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace,
2274 keywords, idx, conf);
2275 }
2276 nspace = 0;
2277
2278 if (ldata->first) {
2279 /*
2280 * There might be a cross-reference carried over from a
2281 * previous line.
2282 */
2283 if (dest->type != NONE) {
2284 xr = snew(xref);
2285 xr->next = NULL;
2286 xr->dest = *dest; /* structure copy */
2287 if (ldata->page->last_xref)
2288 ldata->page->last_xref->next = xr;
2289 else
2290 ldata->page->first_xref = xr;
2291 ldata->page->last_xref = xr;
2292 xr->lx = xr->rx = left_x + ldata->xpos;
2293 xr->by = top_y - ldata->ypos;
2294 xr->ty = top_y - ldata->ypos + ldata->line_height;
2295 } else
2296 xr = NULL;
2297
2298 {
2299 int extra_indent, shortfall, spaces;
2300 int just = ldata->pdata->justification;
2301
2302 /*
2303 * All forms of justification become JUST when we have
2304 * to squeeze the paragraph.
2305 */
2306 if (ldata->hshortfall < 0)
2307 just = JUST;
2308
2309 switch (just) {
2310 case JUST:
2311 shortfall = ldata->hshortfall;
2312 spaces = ldata->nspaces;
2313 extra_indent = 0;
2314 break;
2315 case LEFT:
2316 shortfall = spaces = extra_indent = 0;
2317 break;
2318 case RIGHT:
2319 shortfall = spaces = 0;
2320 extra_indent = ldata->real_shortfall;
2321 break;
2322 }
2323
2324 ret = render_text(ldata->page, ldata->pdata, ldata,
2325 left_x + ldata->xpos + extra_indent,
2326 top_y - ldata->ypos, ldata->first, ldata->end,
2327 &xr, shortfall, spaces, &nspace,
2328 keywords, idx, conf);
2329 }
2330
2331 if (xr) {
2332 /*
2333 * There's a cross-reference continued on to the next line.
2334 */
2335 *dest = xr->dest;
2336 } else
2337 dest->type = NONE;
2338 }
2339
2340 return ret;
2341 }
2342
2343 static void render_para(para_data *pdata, paper_conf *conf,
2344 keywordlist *keywords, indexdata *idx,
2345 paragraph *index_placeholder, page_data *index_page)
2346 {
2347 int last_x;
2348 xref *cxref;
2349 page_data *cxref_page;
2350 xref_dest dest;
2351 para_data *target;
2352 line_data *ldata;
2353
2354 dest.type = NONE;
2355 cxref = NULL;
2356 cxref_page = NULL;
2357
2358 for (ldata = pdata->first; ldata; ldata = ldata->next) {
2359 /*
2360 * If this is a contents entry, we expect to have a single
2361 * enormous cross-reference rectangle covering the whole
2362 * thing. (Unless, of course, it spans multiple pages.)
2363 */
2364 if (pdata->contents_entry && ldata->page != cxref_page) {
2365 cxref_page = ldata->page;
2366 cxref = snew(xref);
2367 cxref->next = NULL;
2368 cxref->dest.type = PAGE;
2369 if (pdata->contents_entry == index_placeholder) {
2370 cxref->dest.page = index_page;
2371 } else {
2372 assert(pdata->contents_entry->private_data);
2373 target = (para_data *)pdata->contents_entry->private_data;
2374 cxref->dest.page = target->first->page;
2375 }
2376 cxref->dest.url = NULL;
2377 if (ldata->page->last_xref)
2378 ldata->page->last_xref->next = cxref;
2379 else
2380 ldata->page->first_xref = cxref;
2381 ldata->page->last_xref = cxref;
2382 cxref->lx = conf->left_margin;
2383 cxref->rx = conf->paper_width - conf->right_margin;
2384 cxref->ty = conf->paper_height - conf->top_margin
2385 - ldata->ypos + ldata->line_height;
2386 }
2387 if (pdata->contents_entry) {
2388 assert(cxref != NULL);
2389 cxref->by = conf->paper_height - conf->top_margin
2390 - ldata->ypos;
2391 }
2392
2393 last_x = render_line(ldata, conf->left_margin,
2394 conf->paper_height - conf->top_margin,
2395 &dest, keywords, idx, conf);
2396 if (ldata == pdata->last)
2397 break;
2398 }
2399
2400 /*
2401 * If this is a contents entry, add leaders and a page
2402 * number.
2403 */
2404 if (pdata->contents_entry) {
2405 word *w;
2406 wchar_t *num;
2407 int wid;
2408 int x;
2409
2410 if (pdata->contents_entry == index_placeholder) {
2411 num = index_page->number;
2412 } else {
2413 assert(pdata->contents_entry->private_data);
2414 target = (para_data *)pdata->contents_entry->private_data;
2415 num = target->first->page->number;
2416 }
2417
2418 w = fake_word(num);
2419 wid = paper_width_simple(pdata, w, conf);
2420 sfree(w);
2421
2422 for (x = 0; x < conf->base_width; x += conf->leader_separation)
2423 if (x - conf->leader_separation > last_x - conf->left_margin &&
2424 x + conf->leader_separation < conf->base_width - wid)
2425 render_string(pdata->last->page,
2426 pdata->fonts[FONT_NORMAL],
2427 pdata->sizes[FONT_NORMAL],
2428 conf->left_margin + x,
2429 (conf->paper_height - conf->top_margin -
2430 pdata->last->ypos), L".", 0);
2431
2432 render_string(pdata->last->page,
2433 pdata->fonts[FONT_NORMAL],
2434 pdata->sizes[FONT_NORMAL],
2435 conf->paper_width - conf->right_margin - wid,
2436 (conf->paper_height - conf->top_margin -
2437 pdata->last->ypos), num, 0);
2438 }
2439
2440 /*
2441 * Render any rectangle (chapter title underline or rule)
2442 * that goes with this paragraph.
2443 */
2444 switch (pdata->rect_type) {
2445 case RECT_CHAPTER_UNDERLINE:
2446 add_rect_to_page(pdata->last->page,
2447 conf->left_margin,
2448 (conf->paper_height - conf->top_margin -
2449 pdata->last->ypos -
2450 conf->chapter_underline_depth),
2451 conf->base_width,
2452 conf->chapter_underline_thickness);
2453 break;
2454 case RECT_RULE:
2455 add_rect_to_page(pdata->first->page,
2456 conf->left_margin + pdata->first->xpos,
2457 (conf->paper_height - conf->top_margin -
2458 pdata->last->ypos -
2459 pdata->last->line_height),
2460 conf->base_width - pdata->first->xpos,
2461 pdata->last->line_height);
2462 break;
2463 default: /* placate gcc */
2464 break;
2465 }
2466 }
2467
2468 static para_data *code_paragraph(int indent, word *words, paper_conf *conf)
2469 {
2470 para_data *pdata = snew(para_data);
2471
2472 /*
2473 * For code paragraphs, I'm going to hack grievously and
2474 * pretend the three normal fonts are the three code paragraph
2475 * fonts.
2476 */
2477 setfont(pdata, &conf->fcode);
2478
2479 pdata->first = pdata->last = NULL;
2480 pdata->outline_level = -1;
2481 pdata->rect_type = RECT_NONE;
2482 pdata->contents_entry = NULL;
2483 pdata->justification = LEFT;
2484
2485 for (; words; words = words->next) {
2486 wchar_t *t, *e, *start;
2487 word *lhead = NULL, *ltail = NULL, *w;
2488 line_data *ldata;
2489 int prev = -1, curr;
2490
2491 t = words->text;
2492 if (words->next && words->next->type == word_Emph) {
2493 e = words->next->text;
2494 words = words->next;
2495 } else
2496 e = NULL;
2497
2498 start = t;
2499
2500 while (*start) {
2501 while (*t) {
2502 if (!e || !*e)
2503 curr = 0;
2504 else if (*e == L'i')
2505 curr = 1;
2506 else if (*e == L'b')
2507 curr = 2;
2508 else
2509 curr = 0;
2510
2511 if (prev < 0)
2512 prev = curr;
2513
2514 if (curr != prev)
2515 break;
2516
2517 t++;
2518 if (e && *e)
2519 e++;
2520 }
2521
2522 /*
2523 * We've isolated a maximal subsequence of the line
2524 * which has the same emphasis. Form it into a word
2525 * structure.
2526 */
2527 w = snew(word);
2528 w->next = NULL;
2529 w->alt = NULL;
2530 w->type = (prev == 0 ? word_WeakCode :
2531 prev == 1 ? word_Emph : word_Normal);
2532 w->text = snewn(t-start+1, wchar_t);
2533 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
2534 w->text[t-start] = '\0';
2535 w->breaks = FALSE;
2536
2537 if (ltail)
2538 ltail->next = w;
2539 else
2540 lhead = w;
2541 ltail = w;
2542
2543 start = t;
2544 prev = -1;
2545 }
2546
2547 ldata = snew(line_data);
2548
2549 ldata->pdata = pdata;
2550 ldata->first = lhead;
2551 ldata->end = NULL;
2552 ldata->line_height = conf->fcode.font_size * UNITS_PER_PT;
2553
2554 ldata->xpos = indent;
2555
2556 if (pdata->last) {
2557 pdata->last->next = ldata;
2558 ldata->prev = pdata->last;
2559 } else {
2560 pdata->first = ldata;
2561 ldata->prev = NULL;
2562 }
2563 ldata->next = NULL;
2564 pdata->last = ldata;
2565
2566 ldata->hshortfall = 0;
2567 ldata->nspaces = 0;
2568 ldata->aux_text = NULL;
2569 ldata->aux_text_2 = NULL;
2570 ldata->aux_left_indent = 0;
2571 /* General opprobrium for breaking in a code paragraph. */
2572 ldata->penalty_before = ldata->penalty_after = 50000;
2573 }
2574
2575 standard_line_spacing(pdata, conf);
2576
2577 return pdata;
2578 }
2579
2580 static para_data *rule_paragraph(int indent, paper_conf *conf)
2581 {
2582 para_data *pdata = snew(para_data);
2583 line_data *ldata;
2584
2585 ldata = snew(line_data);
2586
2587 ldata->pdata = pdata;
2588 ldata->first = NULL;
2589 ldata->end = NULL;
2590 ldata->line_height = conf->rule_thickness;
2591
2592 ldata->xpos = indent;
2593
2594 ldata->prev = NULL;
2595 ldata->next = NULL;
2596
2597 ldata->hshortfall = 0;
2598 ldata->nspaces = 0;
2599 ldata->aux_text = NULL;
2600 ldata->aux_text_2 = NULL;
2601 ldata->aux_left_indent = 0;
2602
2603 /*
2604 * Better to break after a rule than before it
2605 */
2606 ldata->penalty_after += 100000;
2607 ldata->penalty_before += -100000;
2608
2609 pdata->first = pdata->last = ldata;
2610 pdata->outline_level = -1;
2611 pdata->rect_type = RECT_RULE;
2612 pdata->contents_entry = NULL;
2613 pdata->justification = LEFT;
2614
2615 standard_line_spacing(pdata, conf);
2616
2617 return pdata;
2618 }
2619
2620 /*
2621 * Plain-text-like formatting for outline titles.
2622 */
2623 static void paper_rdaddw(rdstring *rs, word *text) {
2624 for (; text; text = text->next) switch (text->type) {
2625 case word_HyperLink:
2626 case word_HyperEnd:
2627 case word_UpperXref:
2628 case word_LowerXref:
2629 case word_XrefEnd:
2630 case word_IndexRef:
2631 break;
2632
2633 case word_Normal:
2634 case word_Emph:
2635 case word_Code:
2636 case word_WeakCode:
2637 case word_WhiteSpace:
2638 case word_EmphSpace:
2639 case word_CodeSpace:
2640 case word_WkCodeSpace:
2641 case word_Quote:
2642 case word_EmphQuote:
2643 case word_CodeQuote:
2644 case word_WkCodeQuote:
2645 assert(text->type != word_CodeQuote &&
2646 text->type != word_WkCodeQuote);
2647 if (towordstyle(text->type) == word_Emph &&
2648 (attraux(text->aux) == attr_First ||
2649 attraux(text->aux) == attr_Only))
2650 rdadd(rs, L'_'); /* FIXME: configurability */
2651 else if (towordstyle(text->type) == word_Code &&
2652 (attraux(text->aux) == attr_First ||
2653 attraux(text->aux) == attr_Only))
2654 rdadd(rs, L'\''); /* FIXME: configurability */
2655 if (removeattr(text->type) == word_Normal) {
2656 rdadds(rs, text->text);
2657 } else if (removeattr(text->type) == word_WhiteSpace) {
2658 rdadd(rs, L' ');
2659 } else if (removeattr(text->type) == word_Quote) {
2660 rdadd(rs, L'\''); /* fixme: configurability */
2661 }
2662 if (towordstyle(text->type) == word_Emph &&
2663 (attraux(text->aux) == attr_Last ||
2664 attraux(text->aux) == attr_Only))
2665 rdadd(rs, L'_'); /* FIXME: configurability */
2666 else if (towordstyle(text->type) == word_Code &&
2667 (attraux(text->aux) == attr_Last ||
2668 attraux(text->aux) == attr_Only))
2669 rdadd(rs, L'\''); /* FIXME: configurability */
2670 break;
2671 }
2672 }
2673
2674 static wchar_t *prepare_outline_title(word *first, wchar_t *separator,
2675 word *second)
2676 {
2677 rdstring rs = {0, 0, NULL};
2678
2679 if (first)
2680 paper_rdaddw(&rs, first);
2681 if (separator)
2682 rdadds(&rs, separator);
2683 if (second)
2684 paper_rdaddw(&rs, second);
2685
2686 return rs.text;
2687 }
2688
2689 static word *fake_word(wchar_t *text)
2690 {
2691 word *ret = snew(word);
2692 ret->next = NULL;
2693 ret->alt = NULL;
2694 ret->type = word_Normal;
2695 ret->text = ustrdup(text);
2696 ret->breaks = FALSE;
2697 ret->aux = 0;
2698 return ret;
2699 }
2700
2701 static word *fake_space_word(void)
2702 {
2703 word *ret = snew(word);
2704 ret->next = NULL;
2705 ret->alt = NULL;
2706 ret->type = word_WhiteSpace;
2707 ret->text = NULL;
2708 ret->breaks = TRUE;
2709 ret->aux = 0;
2710 return ret;
2711 }
2712
2713 static word *fake_page_ref(page_data *page)
2714 {
2715 word *ret = snew(word);
2716 ret->next = NULL;
2717 ret->alt = NULL;
2718 ret->type = word_PageXref;
2719 ret->text = NULL;
2720 ret->breaks = FALSE;
2721 ret->aux = 0;
2722 ret->private_data = page;
2723 return ret;
2724 }
2725
2726 static word *fake_end_ref(void)
2727 {
2728 word *ret = snew(word);
2729 ret->next = NULL;
2730 ret->alt = NULL;
2731 ret->type = word_XrefEnd;
2732 ret->text = NULL;
2733 ret->breaks = FALSE;
2734 ret->aux = 0;
2735 return ret;
2736 }
2737
2738 static word *prepare_contents_title(word *first, wchar_t *separator,
2739 word *second)
2740 {
2741 word *ret;
2742 word **wptr, *w;
2743
2744 wptr = &ret;
2745
2746 if (first) {
2747 w = dup_word_list(first);
2748 *wptr = w;
2749 while (w->next)
2750 w = w->next;
2751 wptr = &w->next;
2752 }
2753
2754 if (separator) {
2755 w = fake_word(separator);
2756 *wptr = w;
2757 wptr = &w->next;
2758 }
2759
2760 if (second) {
2761 *wptr = dup_word_list(second);
2762 }
2763
2764 return ret;
2765 }
2766
2767 static void fold_into_page(page_data *dest, page_data *src, int right_shift)
2768 {
2769 line_data *ldata;
2770
2771 if (!src->first_line)
2772 return;
2773
2774 if (dest->last_line) {
2775 dest->last_line->next = src->first_line;
2776 src->first_line->prev = dest->last_line;
2777 }
2778 dest->last_line = src->last_line;
2779
2780 for (ldata = src->first_line; ldata; ldata = ldata->next) {
2781 ldata->page = dest;
2782 ldata->xpos += right_shift;
2783
2784 if (ldata == src->last_line)
2785 break;
2786 }
2787 }