Add a mechanism for disabling ligature substitution for an entire paragraph
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * TODO in future work:
14 *
15 * - linearised PDF, perhaps?
16 *
17 * - I'm uncertain of whether I need to include a ToUnicode CMap
18 * in each of my font definitions in PDF. Currently things (by
19 * which I mean cut and paste out of acroread) seem to be
20 * working fairly happily without it, but I don't know.
21 *
22 * - rather than the ugly aux_text mechanism for rendering chapter
23 * titles, we could actually build the correct word list and
24 * wrap it as a whole.
25 *
26 * - get vertical font metrics and use them to position the PDF
27 * xref boxes more pleasantly
28 *
29 * - configurability
30 * * page header and footer should be configurable; we should
31 * be able to shift the page number elsewhere, and add other
32 * things such as the current chapter/section title and fixed
33 * text
34 * * remove the fixed mapping from heading levels to heading
35 * styles; offer a menu of styles from which the user can
36 * choose at every heading level
37 * * first-line indent in paragraphs
38 * * fixed text: `Contents', `Index', the colon-space and full
39 * stop in chapter title constructions
40 * * configurable location of contents?
41 * * certainly configurably _remove_ the contents, and possibly
42 * also the index
43 * * double-sided document switch?
44 * + means you have two header/footer formats which
45 * alternate
46 * + and means that mandatory page breaks before chapter
47 * titles should include a blank page if necessary to
48 * start the next section to a right-hand page
49 *
50 * - title pages
51 *
52 * - ability to import other Type 1 fonts
53 * * we need to parse the font to extract its metrics
54 * * then we pass the font bodily to both PS and PDF so it can
55 * be included in the output file
56 *
57 * - character substitution for better typography?
58 * * fi, fl, ffi, ffl ligatures
59 * * use real ellipsis rather than ...
60 * * a hyphen in a word by itself might prefer to be an en-dash
61 * * (Americans might even want a convenient way to use an
62 * em-dash)
63 * * DON'T DO ANY OF THE ABOVE WITHIN \c OR \cw!
64 * * substituting `minus' for `hyphen' in the standard encoding
65 * is probably preferable in Courier, though certainly not in
66 * the main text font
67 * * if I do do this lot, I'm rather inclined to at least try
68 * to think up a configurable way to do it so that Americans
69 * can do em-dash tricks without my intervention and other
70 * people can do other odd things too.
71 */
72
73 #include <assert.h>
74 #include <stdio.h>
75 #include <stdarg.h>
76 #include <stdlib.h>
77
78 #include "halibut.h"
79 #include "paper.h"
80
81 typedef struct paper_conf_Tag paper_conf;
82 typedef struct paper_idx_Tag paper_idx;
83
84 typedef struct {
85 font_data *fonts[NFONTS];
86 int font_size;
87 } font_cfg;
88
89 struct paper_conf_Tag {
90 int paper_width;
91 int paper_height;
92 int left_margin;
93 int top_margin;
94 int right_margin;
95 int bottom_margin;
96 int indent_list_bullet;
97 int indent_list_after;
98 int indent_list;
99 int indent_quote;
100 int base_leading;
101 int base_para_spacing;
102 int chapter_top_space;
103 int sect_num_left_space;
104 int chapter_underline_depth;
105 int chapter_underline_thickness;
106 int rule_thickness;
107 font_cfg fbase, fcode, ftitle, fchapter, *fsect;
108 int nfsect;
109 int contents_indent_step;
110 int contents_margin;
111 int leader_separation;
112 int index_gutter;
113 int index_cols;
114 int index_minsep;
115 int pagenum_fontsize;
116 int footer_distance;
117 wchar_t *lquote, *rquote, *bullet;
118 wchar_t *contents_text, *index_text;
119 /* These are derived from the above */
120 int base_width;
121 int page_height;
122 int index_colwidth;
123 };
124
125 struct paper_idx_Tag {
126 /*
127 * Word list giving the page numbers on which this index entry
128 * appears. Also the last word in the list, for ease of
129 * construction.
130 */
131 word *words;
132 word *lastword;
133 /*
134 * The last page added to the list (so we can ensure we don't
135 * add one twice).
136 */
137 page_data *lastpage;
138 };
139
140 enum {
141 word_PageXref = word_NotWordType + 1
142 };
143
144 /* Flags for render_string() */
145 #define RS_NOLIG 1
146
147 static font_data *make_std_font(font_list *fontlist, char const *name);
148 static void wrap_paragraph(para_data *pdata, word *words,
149 int w, int i1, int i2, paper_conf *conf);
150 static page_data *page_breaks(line_data *first, line_data *last,
151 int page_height, int ncols, int headspace);
152 static int render_string(page_data *page, font_data *font, int fontsize,
153 int x, int y, wchar_t *str, unsigned flags);
154 static int render_line(line_data *ldata, int left_x, int top_y,
155 xref_dest *dest, keywordlist *keywords, indexdata *idx,
156 paper_conf *conf);
157 static void render_para(para_data *pdata, paper_conf *conf,
158 keywordlist *keywords, indexdata *idx,
159 paragraph *index_placeholder, page_data *index_page);
160 static int string_width(font_data *font, wchar_t const *string, int *errs,
161 unsigned flags);
162 static int paper_width_simple(para_data *pdata, word *text, paper_conf *conf);
163 static para_data *code_paragraph(int indent, word *words, paper_conf *conf);
164 static para_data *rule_paragraph(int indent, paper_conf *conf);
165 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
166 static para_data *make_para_data(int ptype, int paux, int indent, int rmargin,
167 word *pkwtext, word *pkwtext2, word *pwords,
168 paper_conf *conf);
169 static void standard_line_spacing(para_data *pdata, paper_conf *conf);
170 static wchar_t *prepare_outline_title(word *first, wchar_t *separator,
171 word *second);
172 static word *fake_word(wchar_t *text);
173 static word *fake_space_word(void);
174 static word *fake_page_ref(page_data *page);
175 static word *fake_end_ref(void);
176 static word *prepare_contents_title(word *first, wchar_t *separator,
177 word *second);
178 static void fold_into_page(page_data *dest, page_data *src, int right_shift);
179
180 static int fonts_ok(wchar_t *string, ...)
181 {
182 font_data *font;
183 va_list ap;
184 int ret = TRUE;
185
186 va_start(ap, string);
187 while ( (font = va_arg(ap, font_data *)) != NULL) {
188 int errs;
189 (void) string_width(font, string, &errs, 0);
190 if (errs) {
191 ret = FALSE;
192 break;
193 }
194 }
195 va_end(ap);
196
197 return ret;
198 }
199
200 static void paper_cfg_fonts(font_data **fonts, font_list *fontlist,
201 wchar_t *wp, filepos *fpos) {
202 font_data *f;
203 char *fn;
204 int i;
205
206 for (i = 0; i < NFONTS && *wp; i++, wp = uadv(wp)) {
207 fn = utoa_dup(wp, CS_ASCII);
208 f = make_std_font(fontlist, fn);
209 if (f)
210 fonts[i] = f;
211 else
212 /* FIXME: proper error */
213 error(err_nofont, fpos, wp);
214 }
215 }
216
217 static paper_conf paper_configure(paragraph *source, font_list *fontlist) {
218 paragraph *p;
219 paper_conf ret;
220
221 /*
222 * Defaults.
223 */
224 ret.paper_width = 595 * UNITS_PER_PT;
225 ret.paper_height = 842 * UNITS_PER_PT;
226 ret.left_margin = 72 * UNITS_PER_PT;
227 ret.top_margin = 72 * UNITS_PER_PT;
228 ret.right_margin = 72 * UNITS_PER_PT;
229 ret.bottom_margin = 108 * UNITS_PER_PT;
230 ret.indent_list_bullet = 6 * UNITS_PER_PT;
231 ret.indent_list_after = 18 * UNITS_PER_PT;
232 ret.indent_quote = 18 * UNITS_PER_PT;
233 ret.base_leading = UNITS_PER_PT;
234 ret.base_para_spacing = 10 * UNITS_PER_PT;
235 ret.chapter_top_space = 72 * UNITS_PER_PT;
236 ret.sect_num_left_space = 12 * UNITS_PER_PT;
237 ret.chapter_underline_depth = 14 * UNITS_PER_PT;
238 ret.chapter_underline_thickness = 3 * UNITS_PER_PT;
239 ret.rule_thickness = 1 * UNITS_PER_PT;
240 ret.fbase.font_size = 12;
241 ret.fbase.fonts[FONT_NORMAL] = make_std_font(fontlist, "Times-Roman");
242 ret.fbase.fonts[FONT_EMPH] = make_std_font(fontlist, "Times-Italic");
243 ret.fbase.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
244 ret.fcode.font_size = 12;
245 ret.fcode.fonts[FONT_NORMAL] = make_std_font(fontlist, "Courier-Bold");
246 ret.fcode.fonts[FONT_EMPH] = make_std_font(fontlist, "Courier-Oblique");
247 ret.fcode.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
248 ret.ftitle.font_size = 24;
249 ret.ftitle.fonts[FONT_NORMAL] = make_std_font(fontlist, "Helvetica-Bold");
250 ret.ftitle.fonts[FONT_EMPH] =
251 make_std_font(fontlist, "Helvetica-BoldOblique");
252 ret.ftitle.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
253 ret.fchapter.font_size = 20;
254 ret.fchapter.fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
255 ret.fchapter.fonts[FONT_EMPH] =
256 make_std_font(fontlist, "Helvetica-BoldOblique");
257 ret.fchapter.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
258 ret.nfsect = 3;
259 ret.fsect = snewn(ret.nfsect, font_cfg);
260 ret.fsect[0].font_size = 16;
261 ret.fsect[0].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
262 ret.fsect[0].fonts[FONT_EMPH] =
263 make_std_font(fontlist, "Helvetica-BoldOblique");
264 ret.fsect[0].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
265 ret.fsect[1].font_size = 14;
266 ret.fsect[1].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
267 ret.fsect[1].fonts[FONT_EMPH] =
268 make_std_font(fontlist, "Helvetica-BoldOblique");
269 ret.fsect[1].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
270 ret.fsect[2].font_size = 13;
271 ret.fsect[2].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
272 ret.fsect[2].fonts[FONT_EMPH] =
273 make_std_font(fontlist, "Helvetica-BoldOblique");
274 ret.fsect[2].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
275 ret.contents_indent_step = 24 * UNITS_PER_PT;
276 ret.contents_margin = 84 * UNITS_PER_PT;
277 ret.leader_separation = 12 * UNITS_PER_PT;
278 ret.index_gutter = 36 * UNITS_PER_PT;
279 ret.index_cols = 2;
280 ret.index_minsep = 18 * UNITS_PER_PT;
281 ret.pagenum_fontsize = 12;
282 ret.footer_distance = 32 * UNITS_PER_PT;
283 ret.lquote = L"\x2018\0\x2019\0'\0'\0\0";
284 ret.rquote = uadv(ret.lquote);
285 ret.bullet = L"\x2022\0-\0\0";
286 ret.contents_text = L"Contents";
287 ret.index_text = L"Index";
288
289 /*
290 * Two-pass configuration so that we can pick up global config
291 * (e.g. `quotes') before having it overridden by specific
292 * config (`paper-quotes'), irrespective of the order in which
293 * they occur.
294 */
295 for (p = source; p; p = p->next) {
296 if (p->type == para_Config) {
297 if (!ustricmp(p->keyword, L"quotes")) {
298 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
299 ret.lquote = uadv(p->keyword);
300 ret.rquote = uadv(ret.lquote);
301 }
302 }
303 }
304 }
305
306 for (p = source; p; p = p->next) {
307 p->private_data = NULL;
308 if (p->type == para_Config) {
309 if (!ustricmp(p->keyword, L"paper-quotes")) {
310 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
311 ret.lquote = uadv(p->keyword);
312 ret.rquote = uadv(ret.lquote);
313 }
314 } else if (!ustricmp(p->keyword, L"contents")) {
315 ret.contents_text = uadv(p->keyword);
316 } else if (!ustricmp(p->keyword, L"index")) {
317 ret.index_text = uadv(p->keyword);
318 } else if (!ustricmp(p->keyword, L"paper-bullet")) {
319 ret.bullet = uadv(p->keyword);
320 } else if (!ustricmp(p->keyword, L"paper-page-width")) {
321 ret.paper_width =
322 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
323 } else if (!ustricmp(p->keyword, L"paper-page-height")) {
324 ret.paper_height =
325 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
326 } else if (!ustricmp(p->keyword, L"paper-left-margin")) {
327 ret.left_margin =
328 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
329 } else if (!ustricmp(p->keyword, L"paper-top-margin")) {
330 ret.top_margin =
331 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
332 } else if (!ustricmp(p->keyword, L"paper-right-margin")) {
333 ret.right_margin =
334 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
335 } else if (!ustricmp(p->keyword, L"paper-bottom-margin")) {
336 ret.bottom_margin =
337 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
338 } else if (!ustricmp(p->keyword, L"paper-list-indent")) {
339 ret.indent_list_bullet =
340 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
341 } else if (!ustricmp(p->keyword, L"paper-listitem-indent")) {
342 ret.indent_list =
343 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
344 } else if (!ustricmp(p->keyword, L"paper-quote-indent")) {
345 ret.indent_quote =
346 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
347 } else if (!ustricmp(p->keyword, L"paper-base-leading")) {
348 ret.base_leading =
349 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
350 } else if (!ustricmp(p->keyword, L"paper-base-para-spacing")) {
351 ret.base_para_spacing =
352 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
353 } else if (!ustricmp(p->keyword, L"paper-chapter-top-space")) {
354 ret.chapter_top_space =
355 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
356 } else if (!ustricmp(p->keyword, L"paper-sect-num-left-space")) {
357 ret.sect_num_left_space =
358 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
359 } else if (!ustricmp(p->keyword, L"paper-chapter-underline-depth")) {
360 ret.chapter_underline_depth =
361 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
362 } else if (!ustricmp(p->keyword, L"paper-chapter-underline-thickness")) {
363 ret.chapter_underline_thickness =
364 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
365 } else if (!ustricmp(p->keyword, L"paper-rule-thickness")) {
366 ret.rule_thickness =
367 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
368 } else if (!ustricmp(p->keyword, L"paper-contents-indent-step")) {
369 ret.contents_indent_step =
370 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
371 } else if (!ustricmp(p->keyword, L"paper-contents-margin")) {
372 ret.contents_margin =
373 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
374 } else if (!ustricmp(p->keyword, L"paper-leader-separation")) {
375 ret.leader_separation =
376 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
377 } else if (!ustricmp(p->keyword, L"paper-index-gutter")) {
378 ret.index_gutter =
379 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
380 } else if (!ustricmp(p->keyword, L"paper-index-minsep")) {
381 ret.index_minsep =
382 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
383 } else if (!ustricmp(p->keyword, L"paper-footer-distance")) {
384 ret.footer_distance =
385 (int) 0.5 + FUNITS_PER_PT * utof(uadv(p->keyword));
386 } else if (!ustricmp(p->keyword, L"paper-base-font-size")) {
387 ret.fbase.font_size = utoi(uadv(p->keyword));
388 } else if (!ustricmp(p->keyword, L"paper-index-columns")) {
389 ret.index_cols = utoi(uadv(p->keyword));
390 } else if (!ustricmp(p->keyword, L"paper-pagenum-font-size")) {
391 ret.pagenum_fontsize = utoi(uadv(p->keyword));
392 } else if (!ustricmp(p->keyword, L"paper-base-fonts")) {
393 paper_cfg_fonts(ret.fbase.fonts, fontlist, uadv(p->keyword),
394 &p->fpos);
395 } else if (!ustricmp(p->keyword, L"paper-code-font-size")) {
396 ret.fcode.font_size = utoi(uadv(p->keyword));
397 } else if (!ustricmp(p->keyword, L"paper-code-fonts")) {
398 paper_cfg_fonts(ret.fcode.fonts, fontlist, uadv(p->keyword),
399 &p->fpos);
400 } else if (!ustricmp(p->keyword, L"paper-title-font-size")) {
401 ret.ftitle.font_size = utoi(uadv(p->keyword));
402 } else if (!ustricmp(p->keyword, L"paper-title-fonts")) {
403 paper_cfg_fonts(ret.ftitle.fonts, fontlist, uadv(p->keyword),
404 &p->fpos);
405 } else if (!ustricmp(p->keyword, L"paper-chapter-font-size")) {
406 ret.fchapter.font_size = utoi(uadv(p->keyword));
407 } else if (!ustricmp(p->keyword, L"paper-chapter-fonts")) {
408 paper_cfg_fonts(ret.fchapter.fonts, fontlist, uadv(p->keyword),
409 &p->fpos);
410 } else if (!ustricmp(p->keyword, L"paper-section-font-size")) {
411 wchar_t *q = uadv(p->keyword);
412 int n = 0;
413 if (uisdigit(*q)) {
414 n = utoi(q);
415 q = uadv(q);
416 }
417 if (n >= ret.nfsect) {
418 int i;
419 ret.fsect = sresize(ret.fsect, n+1, font_cfg);
420 for (i = ret.nfsect; i <= n; i++)
421 ret.fsect[i] = ret.fsect[ret.nfsect-1];
422 ret.nfsect = n+1;
423 }
424 ret.fsect[n].font_size = utoi(q);
425 } else if (!ustricmp(p->keyword, L"paper-section-fonts")) {
426 wchar_t *q = uadv(p->keyword);
427 int n = 0;
428 if (uisdigit(*q)) {
429 n = utoi(q);
430 q = uadv(q);
431 }
432 if (n >= ret.nfsect) {
433 int i;
434 ret.fsect = sresize(ret.fsect, n+1, font_cfg);
435 for (i = ret.nfsect; i <= n; i++)
436 ret.fsect[i] = ret.fsect[ret.nfsect-1];
437 ret.nfsect = n+1;
438 }
439 paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos);
440 }
441 }
442 }
443
444 /*
445 * Set up the derived fields in the conf structure.
446 */
447
448 ret.base_width =
449 ret.paper_width - ret.left_margin - ret.right_margin;
450 ret.page_height =
451 ret.paper_height - ret.top_margin - ret.bottom_margin;
452 ret.indent_list = ret.indent_list_bullet + ret.indent_list_after;
453 ret.index_colwidth =
454 (ret.base_width - (ret.index_cols-1) * ret.index_gutter)
455 / ret.index_cols;
456
457 /*
458 * Now process fallbacks on quote characters and bullets. We
459 * use string_width() to determine whether all of the relevant
460 * fonts contain the same character, and fall back whenever we
461 * find a character which not all of them support.
462 */
463
464 /* Quote characters need not be supported in the fixed code fonts,
465 * but must be in the title and body fonts. */
466 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote))) {
467 int n;
468 if (fonts_ok(ret.lquote,
469 ret.fbase.fonts[FONT_NORMAL],
470 ret.fbase.fonts[FONT_EMPH],
471 ret.ftitle.fonts[FONT_NORMAL],
472 ret.ftitle.fonts[FONT_EMPH],
473 ret.fchapter.fonts[FONT_NORMAL],
474 ret.fchapter.fonts[FONT_EMPH], NULL) &&
475 fonts_ok(ret.rquote,
476 ret.fbase.fonts[FONT_NORMAL],
477 ret.fbase.fonts[FONT_EMPH],
478 ret.ftitle.fonts[FONT_NORMAL],
479 ret.ftitle.fonts[FONT_EMPH],
480 ret.fchapter.fonts[FONT_NORMAL],
481 ret.fchapter.fonts[FONT_EMPH], NULL)) {
482 for (n = 0; n < ret.nfsect; n++)
483 if (!fonts_ok(ret.lquote,
484 ret.fsect[n].fonts[FONT_NORMAL],
485 ret.fsect[n].fonts[FONT_EMPH], NULL) ||
486 !fonts_ok(ret.rquote,
487 ret.fsect[n].fonts[FONT_NORMAL],
488 ret.fsect[n].fonts[FONT_EMPH], NULL))
489 break;
490 if (n == ret.nfsect)
491 break;
492 }
493 ret.lquote = uadv(ret.rquote);
494 ret.rquote = uadv(ret.lquote);
495 }
496
497 /* The bullet character only needs to be supported in the normal body
498 * font (not even in italics). */
499 while (*ret.bullet && *uadv(ret.bullet) &&
500 !fonts_ok(ret.bullet, ret.fbase.fonts[FONT_NORMAL], NULL))
501 ret.bullet = uadv(ret.bullet);
502
503 return ret;
504 }
505
506 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
507 indexdata *idx) {
508 paragraph *p;
509 document *doc;
510 int indent, used_contents;
511 para_data *pdata, *firstpara = NULL, *lastpara = NULL;
512 para_data *firstcont, *lastcont;
513 line_data *firstline, *lastline, *firstcontline, *lastcontline;
514 page_data *pages;
515 font_list *fontlist;
516 paper_conf *conf, ourconf;
517 int has_index;
518 int pagenum;
519 paragraph index_placeholder_para;
520 page_data *first_index_page;
521
522 init_std_fonts();
523 fontlist = snew(font_list);
524 fontlist->head = fontlist->tail = NULL;
525
526 ourconf = paper_configure(sourceform, fontlist);
527 conf = &ourconf;
528
529 /*
530 * Set up a data structure to collect page numbers for each
531 * index entry.
532 */
533 {
534 int i;
535 indexentry *entry;
536
537 has_index = FALSE;
538
539 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
540 paper_idx *pi = snew(paper_idx);
541
542 has_index = TRUE;
543
544 pi->words = pi->lastword = NULL;
545 pi->lastpage = NULL;
546
547 entry->backend_data = pi;
548 }
549 }
550
551 /*
552 * Format the contents entry for each heading.
553 */
554 {
555 word *contents_title;
556 contents_title = fake_word(conf->contents_text);
557
558 firstcont = make_para_data(para_UnnumberedChapter, 0, 0, 0,
559 NULL, NULL, contents_title, conf);
560 lastcont = firstcont;
561 lastcont->next = NULL;
562 firstcontline = firstcont->first;
563 lastcontline = lastcont->last;
564 for (p = sourceform; p; p = p->next) {
565 word *words;
566 int indent;
567
568 switch (p->type) {
569 case para_Chapter:
570 case para_Appendix:
571 case para_UnnumberedChapter:
572 case para_Heading:
573 case para_Subsect:
574 switch (p->type) {
575 case para_Chapter:
576 case para_Appendix:
577 words = prepare_contents_title(p->kwtext, L": ", p->words);
578 indent = 0;
579 break;
580 case para_UnnumberedChapter:
581 words = prepare_contents_title(NULL, NULL, p->words);
582 indent = 0;
583 break;
584 case para_Heading:
585 case para_Subsect:
586 words = prepare_contents_title(p->kwtext2, L" ", p->words);
587 indent = (p->aux + 1) * conf->contents_indent_step;
588 break;
589 }
590 pdata = make_para_data(para_Normal, p->aux, indent,
591 conf->contents_margin,
592 NULL, NULL, words, conf);
593 pdata->next = NULL;
594 pdata->contents_entry = p;
595 lastcont->next = pdata;
596 lastcont = pdata;
597
598 /*
599 * Link all contents line structures together into
600 * a big list.
601 */
602 if (pdata->first) {
603 if (lastcontline) {
604 lastcontline->next = pdata->first;
605 pdata->first->prev = lastcontline;
606 } else {
607 firstcontline = pdata->first;
608 pdata->first->prev = NULL;
609 }
610 lastcontline = pdata->last;
611 lastcontline->next = NULL;
612 }
613
614 break;
615 }
616 }
617
618 /*
619 * And one extra one, for the index.
620 */
621 if (has_index) {
622 pdata = make_para_data(para_Normal, 0, 0,
623 conf->contents_margin,
624 NULL, NULL,
625 fake_word(conf->index_text), conf);
626 pdata->next = NULL;
627 pdata->contents_entry = &index_placeholder_para;
628 lastcont->next = pdata;
629 lastcont = pdata;
630
631 if (pdata->first) {
632 if (lastcontline) {
633 lastcontline->next = pdata->first;
634 pdata->first->prev = lastcontline;
635 } else {
636 firstcontline = pdata->first;
637 pdata->first->prev = NULL;
638 }
639 lastcontline = pdata->last;
640 lastcontline->next = NULL;
641 }
642 }
643 }
644
645 /*
646 * Do the main paragraph formatting.
647 */
648 indent = 0;
649 used_contents = FALSE;
650 firstline = lastline = NULL;
651 for (p = sourceform; p; p = p->next) {
652 p->private_data = NULL;
653
654 switch (p->type) {
655 /*
656 * These paragraph types are either invisible or don't
657 * define text in the normal sense. Either way, they
658 * don't require wrapping.
659 */
660 case para_IM:
661 case para_BR:
662 case para_Biblio:
663 case para_NotParaType:
664 case para_Config:
665 case para_VersionID:
666 case para_NoCite:
667 break;
668
669 /*
670 * These paragraph types don't require wrapping, but
671 * they do affect the line width to which we wrap the
672 * rest of the paragraphs, so we need to pay attention.
673 */
674 case para_LcontPush:
675 indent += conf->indent_list; break;
676 case para_LcontPop:
677 indent -= conf->indent_list; assert(indent >= 0); break;
678 case para_QuotePush:
679 indent += conf->indent_quote; break;
680 case para_QuotePop:
681 indent -= conf->indent_quote; assert(indent >= 0); break;
682
683 /*
684 * This paragraph type is special. Process it
685 * specially.
686 */
687 case para_Code:
688 pdata = code_paragraph(indent, p->words, conf);
689 p->private_data = pdata;
690 if (pdata->first != pdata->last) {
691 pdata->first->penalty_after += 100000;
692 pdata->last->penalty_before += 100000;
693 }
694 break;
695
696 /*
697 * This paragraph is also special.
698 */
699 case para_Rule:
700 pdata = rule_paragraph(indent, conf);
701 p->private_data = pdata;
702 break;
703
704 /*
705 * All of these paragraph types require wrapping in the
706 * ordinary way. So we must supply a set of fonts, a
707 * line width and auxiliary information (e.g. bullet
708 * text) for each one.
709 */
710 case para_Chapter:
711 case para_Appendix:
712 case para_UnnumberedChapter:
713 case para_Heading:
714 case para_Subsect:
715 case para_Normal:
716 case para_BiblioCited:
717 case para_Bullet:
718 case para_NumberedList:
719 case para_DescribedThing:
720 case para_Description:
721 case para_Copyright:
722 case para_Title:
723 pdata = make_para_data(p->type, p->aux, indent, 0,
724 p->kwtext, p->kwtext2, p->words, conf);
725
726 p->private_data = pdata;
727
728 break;
729 }
730
731 if (p->private_data) {
732 pdata = (para_data *)p->private_data;
733
734 /*
735 * If this is the first non-title heading, we link the
736 * contents section in before it.
737 */
738 if (!used_contents && pdata->outline_level > 0) {
739 used_contents = TRUE;
740 if (lastpara)
741 lastpara->next = firstcont;
742 else
743 firstpara = firstcont;
744 lastpara = lastcont;
745 assert(lastpara->next == NULL);
746
747 if (lastline) {
748 lastline->next = firstcontline;
749 firstcontline->prev = lastline;
750 } else {
751 firstline = firstcontline;
752 firstcontline->prev = NULL;
753 }
754 assert(lastcontline != NULL);
755 lastline = lastcontline;
756 lastline->next = NULL;
757 }
758
759 /*
760 * Link all line structures together into a big list.
761 */
762 if (pdata->first) {
763 if (lastline) {
764 lastline->next = pdata->first;
765 pdata->first->prev = lastline;
766 } else {
767 firstline = pdata->first;
768 pdata->first->prev = NULL;
769 }
770 lastline = pdata->last;
771 lastline->next = NULL;
772 }
773
774 /*
775 * Link all paragraph structures together similarly.
776 */
777 pdata->next = NULL;
778 if (lastpara)
779 lastpara->next = pdata;
780 else
781 firstpara = pdata;
782 lastpara = pdata;
783 }
784 }
785
786 /*
787 * Now we have an enormous linked list of every line of text in
788 * the document. Break it up into pages.
789 */
790 pages = page_breaks(firstline, lastline, conf->page_height, 0, 0);
791
792 /*
793 * Number the pages.
794 */
795 {
796 char buf[40];
797 page_data *page;
798
799 pagenum = 0;
800
801 for (page = pages; page; page = page->next) {
802 sprintf(buf, "%d", ++pagenum);
803 page->number = ufroma_dup(buf, CS_ASCII);
804 }
805
806 if (has_index) {
807 first_index_page = snew(page_data);
808 first_index_page->next = first_index_page->prev = NULL;
809 first_index_page->first_line = NULL;
810 first_index_page->last_line = NULL;
811 first_index_page->first_text = first_index_page->last_text = NULL;
812 first_index_page->first_xref = first_index_page->last_xref = NULL;
813 first_index_page->first_rect = first_index_page->last_rect = NULL;
814
815 /* And don't forget the as-yet-uncreated index. */
816 sprintf(buf, "%d", ++pagenum);
817 first_index_page->number = ufroma_dup(buf, CS_ASCII);
818 }
819 }
820
821 /*
822 * Now we're ready to actually lay out the pages. We do this by
823 * looping over _paragraphs_, since we may need to track cross-
824 * references between lines and even across pages.
825 */
826 for (pdata = firstpara; pdata; pdata = pdata->next)
827 render_para(pdata, conf, keywords, idx,
828 &index_placeholder_para, first_index_page);
829
830 /*
831 * Now we've laid out the main body pages, we should have
832 * acquired a full set of page numbers for the index.
833 */
834 if (has_index) {
835 int i;
836 indexentry *entry;
837 word *index_title;
838 para_data *firstidx, *lastidx;
839 line_data *firstidxline, *lastidxline, *ldata;
840 page_data *ipages, *ipages2, *page;
841
842 /*
843 * Create a set of paragraphs for the index.
844 */
845 index_title = fake_word(conf->index_text);
846
847 firstidx = make_para_data(para_UnnumberedChapter, 0, 0, 0,
848 NULL, NULL, index_title, conf);
849 lastidx = firstidx;
850 lastidx->next = NULL;
851 firstidxline = firstidx->first;
852 lastidxline = lastidx->last;
853 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
854 paper_idx *pi = (paper_idx *)entry->backend_data;
855 para_data *text, *pages;
856
857 if (!pi->words)
858 continue;
859
860 text = make_para_data(para_Normal, 0, 0,
861 conf->base_width - conf->index_colwidth,
862 NULL, NULL, entry->text, conf);
863
864 pages = make_para_data(para_Normal, 0, 0,
865 conf->base_width - conf->index_colwidth,
866 NULL, NULL, pi->words, conf);
867
868 text->justification = LEFT;
869 pages->justification = RIGHT;
870 text->last->space_after = pages->first->space_before =
871 conf->base_leading / 2;
872
873 pages->last->space_after = text->first->space_before =
874 conf->base_leading;
875
876 assert(text->first);
877 assert(pages->first);
878 assert(lastidxline);
879 assert(lastidx);
880
881 /*
882 * If feasible, fold the two halves of the index entry
883 * together.
884 */
885 if (text->last->real_shortfall + pages->first->real_shortfall >
886 conf->index_colwidth + conf->index_minsep) {
887 text->last->space_after = -1;
888 pages->first->space_before = -pages->first->line_height+1;
889 }
890
891 lastidx->next = text;
892 text->next = pages;
893 pages->next = NULL;
894 lastidx = pages;
895
896 /*
897 * Link all index line structures together into
898 * a big list.
899 */
900 text->last->next = pages->first;
901 pages->first->prev = text->last;
902
903 lastidxline->next = text->first;
904 text->first->prev = lastidxline;
905
906 lastidxline = pages->last;
907
908 /*
909 * Breaking an index entry anywhere is so bad that I
910 * think I'm going to forbid it totally.
911 */
912 for (ldata = text->first; ldata && ldata->next;
913 ldata = ldata->next) {
914 ldata->next->space_before += ldata->space_after + 1;
915 ldata->space_after = -1;
916 }
917 }
918
919 /*
920 * Now break the index into pages.
921 */
922 ipages = page_breaks(firstidxline, firstidxline, conf->page_height,
923 0, 0);
924 ipages2 = page_breaks(firstidxline->next, lastidxline,
925 conf->page_height,
926 conf->index_cols,
927 firstidxline->space_before +
928 firstidxline->line_height +
929 firstidxline->space_after);
930
931 /*
932 * This will have put each _column_ of the index on a
933 * separate page, which isn't what we want. Fold the pages
934 * back together.
935 */
936 page = ipages2;
937 while (page) {
938 int i;
939
940 for (i = 1; i < conf->index_cols; i++)
941 if (page->next) {
942 page_data *tpage;
943
944 fold_into_page(page, page->next,
945 i * (conf->index_colwidth +
946 conf->index_gutter));
947 tpage = page->next;
948 page->next = page->next->next;
949 if (page->next)
950 page->next->prev = page;
951 sfree(tpage);
952 }
953
954 page = page->next;
955 }
956 /* Also fold the heading on to the same page as the index items. */
957 fold_into_page(ipages, ipages2, 0);
958 ipages->next = ipages2->next;
959 if (ipages->next)
960 ipages->next->prev = ipages;
961 sfree(ipages2);
962 fold_into_page(first_index_page, ipages, 0);
963 first_index_page->next = ipages->next;
964 if (first_index_page->next)
965 first_index_page->next->prev = first_index_page;
966 sfree(ipages);
967 ipages = first_index_page;
968
969 /*
970 * Number the index pages, except the already-numbered
971 * first one.
972 */
973 for (page = ipages->next; page; page = page->next) {
974 char buf[40];
975 sprintf(buf, "%d", ++pagenum);
976 page->number = ufroma_dup(buf, CS_ASCII);
977 }
978
979 /*
980 * Render the index pages.
981 */
982 for (pdata = firstidx; pdata; pdata = pdata->next)
983 render_para(pdata, conf, keywords, idx,
984 &index_placeholder_para, first_index_page);
985
986 /*
987 * Link the index page list on to the end of the main page
988 * list.
989 */
990 if (!pages)
991 pages = ipages;
992 else {
993 for (page = pages; page->next; page = page->next);
994 page->next = ipages;
995 }
996
997 /*
998 * Same with the paragraph list, which will cause the index
999 * to be mentioned in the document outline.
1000 */
1001 if (!firstpara)
1002 firstpara = firstidx;
1003 else
1004 lastpara->next = firstidx;
1005 lastpara = lastidx;
1006 }
1007
1008 /*
1009 * Draw the headers and footers.
1010 *
1011 * FIXME: this should be fully configurable, but for the moment
1012 * I'm just going to put in page numbers in the centre of a
1013 * footer and leave it at that.
1014 */
1015 {
1016 page_data *page;
1017
1018 for (page = pages; page; page = page->next) {
1019 int width;
1020
1021 width = conf->pagenum_fontsize *
1022 string_width(conf->fbase.fonts[FONT_NORMAL], page->number,
1023 NULL, 0);
1024
1025 render_string(page, conf->fbase.fonts[FONT_NORMAL],
1026 conf->pagenum_fontsize,
1027 conf->left_margin + (conf->base_width - width)/2,
1028 conf->bottom_margin - conf->footer_distance,
1029 page->number, 0);
1030 }
1031 }
1032
1033 /*
1034 * Start putting together the overall document structure we're
1035 * going to return.
1036 */
1037 doc = snew(document);
1038 doc->fonts = fontlist;
1039 doc->pages = pages;
1040 doc->paper_width = conf->paper_width;
1041 doc->paper_height = conf->paper_height;
1042
1043 /*
1044 * Collect the section heading paragraphs into a document
1045 * outline. This is slightly fiddly because the Title paragraph
1046 * isn't required to be at the start, although all the others
1047 * must be in order.
1048 */
1049 {
1050 int osize = 20;
1051
1052 doc->outline_elements = snewn(osize, outline_element);
1053 doc->n_outline_elements = 0;
1054
1055 /* First find the title. */
1056 for (pdata = firstpara; pdata; pdata = pdata->next) {
1057 if (pdata->outline_level == 0) {
1058 doc->outline_elements[0].level = 0;
1059 doc->outline_elements[0].pdata = pdata;
1060 doc->n_outline_elements++;
1061 break;
1062 }
1063 }
1064
1065 /* Then collect the rest. */
1066 for (pdata = firstpara; pdata; pdata = pdata->next) {
1067 if (pdata->outline_level > 0) {
1068 if (doc->n_outline_elements >= osize) {
1069 osize += 20;
1070 doc->outline_elements =
1071 sresize(doc->outline_elements, osize, outline_element);
1072 }
1073
1074 doc->outline_elements[doc->n_outline_elements].level =
1075 pdata->outline_level;
1076 doc->outline_elements[doc->n_outline_elements].pdata = pdata;
1077 doc->n_outline_elements++;
1078 }
1079 }
1080 }
1081
1082 return doc;
1083 }
1084
1085 static void setfont(para_data *p, font_cfg *f) {
1086 int i;
1087
1088 for (i = 0; i < NFONTS; i++) {
1089 p->fonts[i] = f->fonts[i];
1090 p->sizes[i] = f->font_size;
1091 }
1092 }
1093
1094 static para_data *make_para_data(int ptype, int paux, int indent, int rmargin,
1095 word *pkwtext, word *pkwtext2, word *pwords,
1096 paper_conf *conf)
1097 {
1098 para_data *pdata;
1099 line_data *ldata;
1100 int extra_indent, firstline_indent, aux_indent;
1101 word *aux, *aux2;
1102
1103 pdata = snew(para_data);
1104 pdata->outline_level = -1;
1105 pdata->outline_title = NULL;
1106 pdata->rect_type = RECT_NONE;
1107 pdata->contents_entry = NULL;
1108 pdata->justification = JUST;
1109 pdata->extraflags = 0;
1110
1111 /*
1112 * Choose fonts for this paragraph.
1113 */
1114 switch (ptype) {
1115 case para_Title:
1116 setfont(pdata, &conf->ftitle);
1117 pdata->outline_level = 0;
1118 break;
1119
1120 case para_Chapter:
1121 case para_Appendix:
1122 case para_UnnumberedChapter:
1123 setfont(pdata, &conf->fchapter);
1124 pdata->outline_level = 1;
1125 break;
1126
1127 case para_Heading:
1128 case para_Subsect:
1129 setfont(pdata,
1130 &conf->fsect[paux >= conf->nfsect ? conf->nfsect - 1 : paux]);
1131 pdata->outline_level = 2 + paux;
1132 break;
1133
1134 case para_Normal:
1135 case para_BiblioCited:
1136 case para_Bullet:
1137 case para_NumberedList:
1138 case para_DescribedThing:
1139 case para_Description:
1140 case para_Copyright:
1141 setfont(pdata, &conf->fbase);
1142 break;
1143 }
1144
1145 /*
1146 * Also select an indentation level depending on the
1147 * paragraph type (list paragraphs other than
1148 * para_DescribedThing need extra indent).
1149 *
1150 * (FIXME: Perhaps at some point we might even arrange
1151 * for the user to be able to request indented first
1152 * lines in paragraphs.)
1153 */
1154 if (ptype == para_Bullet ||
1155 ptype == para_NumberedList ||
1156 ptype == para_Description) {
1157 extra_indent = firstline_indent = conf->indent_list;
1158 } else {
1159 extra_indent = firstline_indent = 0;
1160 }
1161
1162 /*
1163 * Find the auxiliary text for this paragraph.
1164 */
1165 aux = aux2 = NULL;
1166 aux_indent = 0;
1167
1168 switch (ptype) {
1169 case para_Chapter:
1170 case para_Appendix:
1171 case para_Heading:
1172 case para_Subsect:
1173 /*
1174 * For some heading styles (FIXME: be able to
1175 * configure which), the auxiliary text contains
1176 * the chapter number and is arranged to be
1177 * right-aligned a few points left of the primary
1178 * margin. For other styles, the auxiliary text is
1179 * the full chapter _name_ and takes up space
1180 * within the (wrapped) chapter title, meaning that
1181 * we must move the first line indent over to make
1182 * space for it.
1183 */
1184 if (ptype == para_Heading || ptype == para_Subsect) {
1185 int len;
1186
1187 aux = pkwtext2;
1188 len = paper_width_simple(pdata, pkwtext2, conf);
1189 aux_indent = -len - conf->sect_num_left_space;
1190
1191 pdata->outline_title =
1192 prepare_outline_title(pkwtext2, L" ", pwords);
1193 } else {
1194 aux = pkwtext;
1195 aux2 = fake_word(L": ");
1196 aux_indent = 0;
1197
1198 firstline_indent += paper_width_simple(pdata, aux, conf);
1199 firstline_indent += paper_width_simple(pdata, aux2, conf);
1200
1201 pdata->outline_title =
1202 prepare_outline_title(pkwtext, L": ", pwords);
1203 }
1204 break;
1205
1206 case para_Bullet:
1207 /*
1208 * Auxiliary text consisting of a bullet.
1209 */
1210 aux = fake_word(conf->bullet);
1211 aux_indent = indent + conf->indent_list_bullet;
1212 break;
1213
1214 case para_NumberedList:
1215 /*
1216 * Auxiliary text consisting of the number followed
1217 * by a (FIXME: configurable) full stop.
1218 */
1219 aux = pkwtext;
1220 aux2 = fake_word(L".");
1221 aux_indent = indent + conf->indent_list_bullet;
1222 break;
1223
1224 case para_BiblioCited:
1225 /*
1226 * Auxiliary text consisting of the bibliography
1227 * reference text, and a trailing space.
1228 */
1229 aux = pkwtext;
1230 aux2 = fake_word(L" ");
1231 aux_indent = indent;
1232 firstline_indent += paper_width_simple(pdata, aux, conf);
1233 firstline_indent += paper_width_simple(pdata, aux2, conf);
1234 break;
1235 }
1236
1237 if (pdata->outline_level >= 0 && !pdata->outline_title) {
1238 pdata->outline_title =
1239 prepare_outline_title(NULL, NULL, pwords);
1240 }
1241
1242 wrap_paragraph(pdata, pwords, conf->base_width - rmargin,
1243 indent + firstline_indent,
1244 indent + extra_indent, conf);
1245
1246 pdata->first->aux_text = aux;
1247 pdata->first->aux_text_2 = aux2;
1248 pdata->first->aux_left_indent = aux_indent;
1249
1250 /*
1251 * Line breaking penalties.
1252 */
1253 switch (ptype) {
1254 case para_Chapter:
1255 case para_Appendix:
1256 case para_Heading:
1257 case para_Subsect:
1258 case para_UnnumberedChapter:
1259 /*
1260 * Fixed and large penalty for breaking straight
1261 * after a heading; corresponding bonus for
1262 * breaking straight before.
1263 */
1264 pdata->first->penalty_before = -500000;
1265 pdata->last->penalty_after = 500000;
1266 for (ldata = pdata->first; ldata; ldata = ldata->next)
1267 ldata->penalty_after = 500000;
1268 break;
1269
1270 case para_DescribedThing:
1271 /*
1272 * This is treated a bit like a small heading:
1273 * there's a penalty for breaking after it (i.e.
1274 * between it and its description), and a bonus for
1275 * breaking before it (actually _between_ list
1276 * items).
1277 */
1278 pdata->first->penalty_before = -200000;
1279 pdata->last->penalty_after = 200000;
1280 break;
1281
1282 default:
1283 /*
1284 * Most paragraph types: widow/orphan control by
1285 * discouraging breaking one line from the end of
1286 * any paragraph.
1287 */
1288 if (pdata->first != pdata->last) {
1289 pdata->first->penalty_after = 100000;
1290 pdata->last->penalty_before = 100000;
1291 }
1292 break;
1293 }
1294
1295 standard_line_spacing(pdata, conf);
1296
1297 /*
1298 * Some kinds of section heading require a page break before
1299 * them and an underline after.
1300 */
1301 if (ptype == para_Title ||
1302 ptype == para_Chapter ||
1303 ptype == para_Appendix ||
1304 ptype == para_UnnumberedChapter) {
1305 pdata->first->page_break = TRUE;
1306 pdata->first->space_before = conf->chapter_top_space;
1307 pdata->last->space_after +=
1308 (conf->chapter_underline_depth +
1309 conf->chapter_underline_thickness);
1310 pdata->rect_type = RECT_CHAPTER_UNDERLINE;
1311 }
1312
1313 return pdata;
1314 }
1315
1316 static void standard_line_spacing(para_data *pdata, paper_conf *conf)
1317 {
1318 line_data *ldata;
1319
1320 /*
1321 * Set the line spacing for each line in this paragraph.
1322 */
1323 for (ldata = pdata->first; ldata; ldata = ldata->next) {
1324 if (ldata == pdata->first)
1325 ldata->space_before = conf->base_para_spacing / 2;
1326 else
1327 ldata->space_before = conf->base_leading / 2;
1328 if (ldata == pdata->last)
1329 ldata->space_after = conf->base_para_spacing / 2;
1330 else
1331 ldata->space_after = conf->base_leading / 2;
1332 ldata->page_break = FALSE;
1333 }
1334 }
1335
1336 static font_encoding *new_font_encoding(font_data *font)
1337 {
1338 font_encoding *fe;
1339 int i;
1340
1341 fe = snew(font_encoding);
1342 fe->next = NULL;
1343
1344 if (font->list->tail)
1345 font->list->tail->next = fe;
1346 else
1347 font->list->head = fe;
1348 font->list->tail = fe;
1349
1350 fe->font = font;
1351 fe->free_pos = 0x21;
1352
1353 for (i = 0; i < 256; i++) {
1354 fe->vector[i] = NOGLYPH;
1355 fe->to_unicode[i] = 0xFFFF;
1356 }
1357
1358 return fe;
1359 }
1360
1361 static subfont_map_entry *encode_glyph_at(glyph g, wchar_t u,
1362 font_encoding *fe, int pos)
1363 {
1364 subfont_map_entry *sme = snew(subfont_map_entry);
1365
1366 sme->subfont = fe;
1367 sme->position = pos;
1368 fe->vector[pos] = g;
1369 fe->to_unicode[pos] = u;
1370 add234(fe->font->subfont_map, sme);
1371 return sme;
1372 }
1373
1374 static int new_sfmap_cmp(void *a, void *b)
1375 {
1376 glyph ga = *(glyph *)a;
1377 subfont_map_entry *sb = b;
1378 glyph gb = sb->subfont->vector[sb->position];
1379
1380 if (ga < gb) return -1;
1381 if (ga > gb) return 1;
1382 return 0;
1383 }
1384
1385 static subfont_map_entry *encode_glyph(glyph g, wchar_t u, font_data *font)
1386 {
1387 subfont_map_entry *sme;
1388 int c;
1389
1390 sme = find234(font->subfont_map, &g, new_sfmap_cmp);
1391 if (sme) return sme;
1392
1393 /*
1394 * This character is not yet in a subfont. Assign one.
1395 */
1396 if (font->latest_subfont->free_pos >= 0x100)
1397 font->latest_subfont = new_font_encoding(font);
1398
1399 c = font->latest_subfont->free_pos++;
1400 if (font->latest_subfont->free_pos == 0x7F)
1401 font->latest_subfont->free_pos = 0xA1;
1402
1403 return encode_glyph_at(g, u, font->latest_subfont, c);
1404 }
1405
1406 static int sfmap_cmp(void *a, void *b)
1407 {
1408 subfont_map_entry *sa = a, *sb = b;
1409 glyph ga = sa->subfont->vector[sa->position];
1410 glyph gb = sb->subfont->vector[sb->position];
1411
1412 if (ga < gb) return -1;
1413 if (ga > gb) return 1;
1414 return 0;
1415 }
1416
1417 int width_cmp(void *a, void *b)
1418 {
1419 glyph_width const *wa = a, *wb = b;
1420
1421 if (wa->glyph < wb->glyph)
1422 return -1;
1423 if (wa->glyph > wb->glyph)
1424 return 1;
1425 return 0;
1426 }
1427
1428 int kern_cmp(void *a, void *b)
1429 {
1430 kern_pair const *ka = a, *kb = b;
1431
1432 if (ka->left < kb->left)
1433 return -1;
1434 if (ka->left > kb->left)
1435 return 1;
1436 if (ka->right < kb->right)
1437 return -1;
1438 if (ka->right > kb->right)
1439 return 1;
1440 return 0;
1441 }
1442
1443 int lig_cmp(void *a, void *b)
1444 {
1445 ligature const *la = a, *lb = b;
1446
1447 if (la->left < lb->left)
1448 return -1;
1449 if (la->left > lb->left)
1450 return 1;
1451 if (la->right < lb->right)
1452 return -1;
1453 if (la->right > lb->right)
1454 return 1;
1455 return 0;
1456 }
1457
1458 static int utoglyph(font_info const *fi, wchar_t u) {
1459 return (u < 0 || u > 0xFFFF ? NOGLYPH : fi->bmp[u]);
1460 }
1461
1462 static font_data *make_std_font(font_list *fontlist, char const *name)
1463 {
1464 font_info const *fi;
1465 font_data *f;
1466 font_encoding *fe;
1467 int i;
1468
1469 for (fe = fontlist->head; fe; fe = fe->next)
1470 if (strcmp(fe->font->info->name, name) == 0)
1471 return fe->font;
1472
1473 for (fi = all_fonts; fi; fi = fi->next)
1474 if (strcmp(fi->name, name) == 0) break;
1475 if (!fi) return NULL;
1476
1477 f = snew(font_data);
1478
1479 f->list = fontlist;
1480 f->info = fi;
1481 f->subfont_map = newtree234(sfmap_cmp);
1482
1483 /*
1484 * Our first subfont will contain all of US-ASCII. This isn't
1485 * really necessary - we could just create custom subfonts
1486 * precisely as the whim of render_string dictated - but
1487 * instinct suggests that it might be nice to have the text in
1488 * the output files look _marginally_ recognisable.
1489 */
1490 fe = new_font_encoding(f);
1491 fe->free_pos = 0xA1; /* only the top half is free */
1492 f->latest_subfont = fe;
1493
1494 for (i = 0x20; i <= 0x7E; i++) {
1495 glyph g = utoglyph(fi, i);
1496 if (g != NOGLYPH)
1497 encode_glyph_at(g, i, fe, i);
1498 }
1499
1500 return f;
1501 }
1502
1503 /* NB: arguments are glyph numbers from font->bmp. */
1504 int find_width(font_data *font, glyph index)
1505 {
1506 glyph_width wantw;
1507 glyph_width const *w;
1508
1509 wantw.glyph = index;
1510 w = find234(font->info->widths, &wantw, NULL);
1511 if (!w) return 0;
1512 return w->width;
1513 }
1514
1515 static int find_kern(font_data *font, int lindex, int rindex)
1516 {
1517 kern_pair wantkp;
1518 kern_pair const *kp;
1519
1520 if (lindex == NOGLYPH || rindex == NOGLYPH)
1521 return 0;
1522 wantkp.left = lindex;
1523 wantkp.right = rindex;
1524 kp = find234(font->info->kerns, &wantkp, NULL);
1525 if (kp == NULL)
1526 return 0;
1527 return kp->kern;
1528 }
1529
1530 static int find_lig(font_data *font, int lindex, int rindex)
1531 {
1532 ligature wantlig;
1533 ligature const *lig;
1534
1535 if (lindex == NOGLYPH || rindex == NOGLYPH)
1536 return NOGLYPH;
1537 wantlig.left = lindex;
1538 wantlig.right = rindex;
1539 lig = find234(font->info->ligs, &wantlig, NULL);
1540 if (lig == NULL)
1541 return NOGLYPH;
1542 return lig->lig;
1543 }
1544
1545 static int string_width(font_data *font, wchar_t const *string, int *errs,
1546 unsigned flags)
1547 {
1548 int width = 0;
1549 int nindex, index, oindex, lindex;
1550
1551 if (errs)
1552 *errs = 0;
1553
1554 oindex = NOGLYPH;
1555 index = utoglyph(font->info, *string);
1556 for (; *string; string++) {
1557 nindex = utoglyph(font->info, string[1]);
1558
1559 if (index == NOGLYPH) {
1560 if (errs)
1561 *errs = 1;
1562 } else {
1563 if (!(flags & RS_NOLIG) &&
1564 (lindex = find_lig(font, index, nindex)) != NOGLYPH) {
1565 index = lindex;
1566 continue;
1567 }
1568 width += find_kern(font, oindex, index) + find_width(font, index);
1569 }
1570 oindex = index;
1571 index = nindex;
1572 }
1573
1574 return width;
1575 }
1576
1577 static int paper_width_internal(void *vctx, word *word, int *nspaces);
1578
1579 struct paper_width_ctx {
1580 int minspacewidth;
1581 para_data *pdata;
1582 paper_conf *conf;
1583 };
1584
1585 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
1586 int w = 0;
1587 while (text && text != end) {
1588 w += paper_width_internal(vctx, text, nspaces);
1589 text = text->next;
1590 }
1591 return w;
1592 }
1593
1594 static int paper_width_internal(void *vctx, word *word, int *nspaces)
1595 {
1596 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
1597 int style, type, findex, width, errs;
1598 wchar_t *str;
1599 unsigned flags = 0;
1600
1601 switch (word->type) {
1602 case word_HyperLink:
1603 case word_HyperEnd:
1604 case word_UpperXref:
1605 case word_LowerXref:
1606 case word_PageXref:
1607 case word_XrefEnd:
1608 case word_IndexRef:
1609 return 0;
1610 }
1611
1612 style = towordstyle(word->type);
1613 type = removeattr(word->type);
1614
1615 findex = (style == word_Normal ? FONT_NORMAL :
1616 style == word_Emph ? FONT_EMPH :
1617 FONT_CODE);
1618
1619 if (style == word_Code || style == word_WeakCode) flags |= RS_NOLIG;
1620
1621 if (type == word_Normal) {
1622 str = word->text;
1623 } else if (type == word_WhiteSpace) {
1624 if (findex != FONT_CODE) {
1625 if (nspaces)
1626 (*nspaces)++;
1627 return ctx->minspacewidth;
1628 } else
1629 str = L" ";
1630 } else /* if (type == word_Quote) */ {
1631 if (word->aux == quote_Open)
1632 str = ctx->conf->lquote;
1633 else
1634 str = ctx->conf->rquote;
1635 }
1636
1637 width = string_width(ctx->pdata->fonts[findex], str, &errs, flags);
1638
1639 if (errs && word->alt)
1640 return paper_width_list(vctx, word->alt, NULL, nspaces);
1641 else
1642 return ctx->pdata->sizes[findex] * width;
1643 }
1644
1645 static int paper_width(void *vctx, word *word)
1646 {
1647 return paper_width_internal(vctx, word, NULL);
1648 }
1649
1650 static int paper_width_simple(para_data *pdata, word *text, paper_conf *conf)
1651 {
1652 struct paper_width_ctx ctx;
1653
1654 ctx.pdata = pdata;
1655 ctx.minspacewidth =
1656 (pdata->sizes[FONT_NORMAL] *
1657 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL, 0));
1658 ctx.conf = conf;
1659
1660 return paper_width_list(&ctx, text, NULL, NULL);
1661 }
1662
1663 static void wrap_paragraph(para_data *pdata, word *words,
1664 int w, int i1, int i2, paper_conf *conf)
1665 {
1666 wrappedline *wrapping, *p;
1667 int spacewidth;
1668 struct paper_width_ctx ctx;
1669 int line_height;
1670
1671 /*
1672 * We're going to need to store the line height in every line
1673 * structure we generate.
1674 */
1675 {
1676 int i;
1677 line_height = 0;
1678 for (i = 0; i < NFONTS; i++)
1679 if (line_height < pdata->sizes[i])
1680 line_height = pdata->sizes[i];
1681 line_height *= UNITS_PER_PT;
1682 }
1683
1684 spacewidth = (pdata->sizes[FONT_NORMAL] *
1685 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL, 0));
1686 if (spacewidth == 0) {
1687 /*
1688 * A font without a space?! Disturbing. I hope this never
1689 * comes up, but I'll make a random guess anyway and set my
1690 * space width to half the point size.
1691 */
1692 spacewidth = pdata->sizes[FONT_NORMAL] * UNITS_PER_PT / 2;
1693 }
1694
1695 /*
1696 * I'm going to set the _minimum_ space width to 3/5 of the
1697 * standard one, and use the standard one as the optimum.
1698 */
1699 ctx.minspacewidth = spacewidth * 3 / 5;
1700 ctx.pdata = pdata;
1701 ctx.conf = conf;
1702
1703 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
1704
1705 /*
1706 * Having done the wrapping, we now concoct a set of line_data
1707 * structures.
1708 */
1709 pdata->first = pdata->last = NULL;
1710
1711 for (p = wrapping; p; p = p->next) {
1712 line_data *ldata;
1713 word *wd;
1714 int len, wid, spaces;
1715
1716 ldata = snew(line_data);
1717
1718 ldata->pdata = pdata;
1719 ldata->first = p->begin;
1720 ldata->end = p->end;
1721 ldata->line_height = line_height;
1722
1723 ldata->xpos = (p == wrapping ? i1 : i2);
1724
1725 if (pdata->last) {
1726 pdata->last->next = ldata;
1727 ldata->prev = pdata->last;
1728 } else {
1729 pdata->first = ldata;
1730 ldata->prev = NULL;
1731 }
1732 ldata->next = NULL;
1733 pdata->last = ldata;
1734
1735 spaces = 0;
1736 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
1737 wid = (p == wrapping ? w - i1 : w - i2);
1738 wd = ldata->first;
1739
1740 ldata->hshortfall = wid - len;
1741 ldata->nspaces = spaces;
1742 /*
1743 * This tells us how much the space width needs to
1744 * change from _min_spacewidth. But we want to store
1745 * its difference from the _natural_ space width, to
1746 * make the text rendering easier.
1747 */
1748 ldata->hshortfall += ctx.minspacewidth * spaces;
1749 ldata->hshortfall -= spacewidth * spaces;
1750 ldata->real_shortfall = ldata->hshortfall;
1751 /*
1752 * Special case: on the last line of a paragraph, we
1753 * never stretch spaces.
1754 */
1755 if (ldata->hshortfall > 0 && !p->next)
1756 ldata->hshortfall = 0;
1757
1758 ldata->aux_text = NULL;
1759 ldata->aux_text_2 = NULL;
1760 ldata->aux_left_indent = 0;
1761 ldata->penalty_before = ldata->penalty_after = 0;
1762 }
1763
1764 }
1765
1766 static page_data *page_breaks(line_data *first, line_data *last,
1767 int page_height, int ncols, int headspace)
1768 {
1769 line_data *l, *m;
1770 page_data *ph, *pt;
1771 int n, n1, this_height;
1772
1773 /*
1774 * Page breaking is done by a close analogue of the optimal
1775 * paragraph wrapping algorithm used by wrap_para(). We work
1776 * backwards from the end of the document line by line; for
1777 * each line, we contemplate every possible number of lines we
1778 * could put on a page starting with that line, determine a
1779 * cost function for each one, add it to the pre-computed cost
1780 * function for optimally page-breaking everything after that
1781 * page, and pick the best option.
1782 *
1783 * This is made slightly more complex by the fact that we have
1784 * a multi-column index with a heading at the top of the
1785 * _first_ page, meaning that the first _ncols_ pages must have
1786 * a different length. Hence, we must do the wrapping ncols+1
1787 * times over, hypothetically trying to put every subsequence
1788 * on every possible page.
1789 *
1790 * Since my line_data structures are only used for this
1791 * purpose, I might as well just store the algorithm data
1792 * directly in them.
1793 */
1794
1795 for (l = last; l; l = l->prev) {
1796 l->bestcost = snewn(ncols+1, int);
1797 l->vshortfall = snewn(ncols+1, int);
1798 l->text = snewn(ncols+1, int);
1799 l->space = snewn(ncols+1, int);
1800 l->page_last = snewn(ncols+1, line_data *);
1801
1802 for (n = 0; n <= ncols; n++) {
1803 int minheight, text = 0, space = 0;
1804 int cost;
1805
1806 n1 = (n < ncols ? n+1 : ncols);
1807 if (n < ncols)
1808 this_height = page_height - headspace;
1809 else
1810 this_height = page_height;
1811
1812 l->bestcost[n] = -1;
1813 for (m = l; m; m = m->next) {
1814 if (m != l && m->page_break)
1815 break; /* we've gone as far as we can */
1816
1817 if (m != l) {
1818 if (m->prev->space_after > 0)
1819 space += m->prev->space_after;
1820 else
1821 text += m->prev->space_after;
1822 }
1823 if (m != l || m->page_break) {
1824 if (m->space_before > 0)
1825 space += m->space_before;
1826 else
1827 text += m->space_before;
1828 }
1829 text += m->line_height;
1830 minheight = text + space;
1831
1832 if (m != l && minheight > this_height)
1833 break;
1834
1835 /*
1836 * If the space after this paragraph is _negative_
1837 * (which means the next line is folded on to this
1838 * one, which happens in the index), we absolutely
1839 * cannot break here.
1840 */
1841 if (m->space_after >= 0) {
1842
1843 /*
1844 * Compute the cost of this arrangement, as the
1845 * square of the amount of wasted space on the
1846 * page. Exception: if this is the last page
1847 * before a mandatory break or the document
1848 * end, we don't penalise a large blank area.
1849 */
1850 if (m != last && m->next && !m->next->page_break)
1851 {
1852 int x = (this_height - minheight) / FUNITS_PER_PT *
1853 4096.0;
1854 int xf;
1855
1856 xf = x & 0xFF;
1857 x >>= 8;
1858
1859 cost = x*x;
1860 cost += (x * xf) >> 8;
1861 } else
1862 cost = 0;
1863
1864 if (m != last && m->next && !m->next->page_break) {
1865 cost += m->penalty_after;
1866 cost += m->next->penalty_before;
1867 }
1868
1869 if (m != last && m->next && !m->next->page_break)
1870 cost += m->next->bestcost[n1];
1871 if (l->bestcost[n] == -1 || l->bestcost[n] > cost) {
1872 /*
1873 * This is the best option yet for this
1874 * starting point.
1875 */
1876 l->bestcost[n] = cost;
1877 if (m != last && m->next && !m->next->page_break)
1878 l->vshortfall[n] = this_height - minheight;
1879 else
1880 l->vshortfall[n] = 0;
1881 l->text[n] = text;
1882 l->space[n] = space;
1883 l->page_last[n] = m;
1884 }
1885 }
1886
1887 if (m == last)
1888 break;
1889 }
1890 }
1891 }
1892
1893 /*
1894 * Now go through the line list forwards and assemble the
1895 * actual pages.
1896 */
1897 ph = pt = NULL;
1898
1899 l = first;
1900 n = 0;
1901 while (l) {
1902 page_data *page;
1903 int text, space, head;
1904
1905 page = snew(page_data);
1906 page->next = NULL;
1907 page->prev = pt;
1908 if (pt)
1909 pt->next = page;
1910 else
1911 ph = page;
1912 pt = page;
1913
1914 page->first_line = l;
1915 page->last_line = l->page_last[n];
1916
1917 page->first_text = page->last_text = NULL;
1918 page->first_xref = page->last_xref = NULL;
1919 page->first_rect = page->last_rect = NULL;
1920
1921 /*
1922 * Now assign a y-coordinate to each line on the page.
1923 */
1924 text = space = 0;
1925 head = (n < ncols ? headspace : 0);
1926 for (l = page->first_line; l; l = l->next) {
1927 if (l != page->first_line) {
1928 if (l->prev->space_after > 0)
1929 space += l->prev->space_after;
1930 else
1931 text += l->prev->space_after;
1932 }
1933 if (l != page->first_line || l->page_break) {
1934 if (l->space_before > 0)
1935 space += l->space_before;
1936 else
1937 text += l->space_before;
1938 }
1939 text += l->line_height;
1940
1941 l->page = page;
1942 l->ypos = text + space + head;
1943 if (page->first_line->space[n]) {
1944 l->ypos += space * (float)page->first_line->vshortfall[n] /
1945 page->first_line->space[n];
1946 }
1947
1948 if (l == page->last_line)
1949 break;
1950 }
1951
1952 l = page->last_line;
1953 if (l == last)
1954 break;
1955 l = l->next;
1956
1957 n = (n < ncols ? n+1 : ncols);
1958 }
1959
1960 return ph;
1961 }
1962
1963 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
1964 {
1965 rect *r = snew(rect);
1966
1967 r->next = NULL;
1968 if (page->last_rect)
1969 page->last_rect->next = r;
1970 else
1971 page->first_rect = r;
1972 page->last_rect = r;
1973
1974 r->x = x;
1975 r->y = y;
1976 r->w = w;
1977 r->h = h;
1978 }
1979
1980 static void add_string_to_page(page_data *page, int x, int y,
1981 font_encoding *fe, int size, char *text,
1982 int width)
1983 {
1984 text_fragment *frag;
1985
1986 frag = snew(text_fragment);
1987 frag->next = NULL;
1988
1989 if (page->last_text)
1990 page->last_text->next = frag;
1991 else
1992 page->first_text = frag;
1993 page->last_text = frag;
1994
1995 frag->x = x;
1996 frag->y = y;
1997 frag->fe = fe;
1998 frag->fontsize = size;
1999 frag->text = dupstr(text);
2000 frag->width = width;
2001 }
2002
2003 /*
2004 * Returns the updated x coordinate.
2005 */
2006 static int render_string(page_data *page, font_data *font, int fontsize,
2007 int x, int y, wchar_t *str, unsigned flags)
2008 {
2009 char *text;
2010 int textpos, textwid, kern, nglyph, glyph, oglyph, lig;
2011 font_encoding *subfont = NULL, *sf;
2012 subfont_map_entry *sme;
2013
2014 text = snewn(1 + ustrlen(str), char);
2015 textpos = textwid = 0;
2016
2017 glyph = NOGLYPH;
2018 nglyph = utoglyph(font->info, *str);
2019 while (*str) {
2020 oglyph = glyph;
2021 glyph = nglyph;
2022 nglyph = utoglyph(font->info, str[1]);
2023
2024 if (glyph == NOGLYPH) {
2025 str++;
2026 continue; /* nothing more we can do here */
2027 }
2028
2029 if (!(flags & RS_NOLIG) &&
2030 (lig = find_lig(font, glyph, nglyph)) != NOGLYPH) {
2031 nglyph = lig;
2032 str++;
2033 continue;
2034 }
2035
2036 /*
2037 * Find which subfont this character is going in.
2038 */
2039 sme = encode_glyph(glyph, *str, font);
2040 sf = sme->subfont;
2041
2042 kern = find_kern(font, oglyph, glyph) * fontsize;
2043
2044 if (!subfont || sf != subfont || kern) {
2045 if (subfont) {
2046 text[textpos] = '\0';
2047 add_string_to_page(page, x, y, subfont, fontsize, text,
2048 textwid);
2049 x += textwid + kern;
2050 } else {
2051 assert(textpos == 0);
2052 }
2053 textpos = 0;
2054 textwid = 0;
2055 subfont = sf;
2056 }
2057
2058 text[textpos++] = sme->position;
2059 textwid += find_width(font, glyph) * fontsize;
2060
2061 str++;
2062 }
2063
2064 if (textpos > 0) {
2065 text[textpos] = '\0';
2066 add_string_to_page(page, x, y, subfont, fontsize, text, textwid);
2067 x += textwid;
2068 }
2069
2070 return x;
2071 }
2072
2073 /*
2074 * Returns the updated x coordinate.
2075 */
2076 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
2077 int x, int y, word *text, word *text_end, xref **xr,
2078 int shortfall, int nspaces, int *nspace,
2079 keywordlist *keywords, indexdata *idx, paper_conf *conf)
2080 {
2081 while (text && text != text_end) {
2082 int style, type, findex, errs;
2083 wchar_t *str;
2084 xref_dest dest;
2085 unsigned flags = 0;
2086
2087 switch (text->type) {
2088 /*
2089 * Start a cross-reference.
2090 */
2091 case word_HyperLink:
2092 case word_UpperXref:
2093 case word_LowerXref:
2094 case word_PageXref:
2095
2096 if (text->type == word_HyperLink) {
2097 dest.type = URL;
2098 dest.url = utoa_dup(text->text, CS_ASCII);
2099 dest.page = NULL;
2100 } else if (text->type == word_PageXref) {
2101 dest.type = PAGE;
2102 dest.url = NULL;
2103 dest.page = (page_data *)text->private_data;
2104 } else {
2105 keyword *kwl = kw_lookup(keywords, text->text);
2106 para_data *pdata;
2107
2108 if (kwl) {
2109 assert(kwl->para->private_data);
2110 pdata = (para_data *) kwl->para->private_data;
2111 dest.type = PAGE;
2112 dest.page = pdata->first->page;
2113 dest.url = NULL;
2114 } else {
2115 /*
2116 * Shouldn't happen, but *shrug*
2117 */
2118 dest.type = NONE;
2119 dest.page = NULL;
2120 dest.url = NULL;
2121 }
2122 }
2123 if (dest.type != NONE) {
2124 *xr = snew(xref);
2125 (*xr)->dest = dest; /* structure copy */
2126 if (page->last_xref)
2127 page->last_xref->next = *xr;
2128 else
2129 page->first_xref = *xr;
2130 page->last_xref = *xr;
2131 (*xr)->next = NULL;
2132
2133 /*
2134 * FIXME: Ideally we should have, and use, some
2135 * vertical font metric information here so that
2136 * our cross-ref rectangle can take account of
2137 * descenders and the font's cap height. This will
2138 * do for the moment, but it isn't ideal.
2139 */
2140 (*xr)->lx = (*xr)->rx = x;
2141 (*xr)->by = y;
2142 (*xr)->ty = y + ldata->line_height;
2143 }
2144 goto nextword;
2145
2146 /*
2147 * Finish extending a cross-reference box.
2148 */
2149 case word_HyperEnd:
2150 case word_XrefEnd:
2151 *xr = NULL;
2152 goto nextword;
2153
2154 /*
2155 * Add the current page number to the list of pages
2156 * referenced by an index entry.
2157 */
2158 case word_IndexRef:
2159 /*
2160 * We don't create index references in contents entries.
2161 */
2162 if (!pdata->contents_entry) {
2163 indextag *tag;
2164 int i;
2165
2166 tag = index_findtag(idx, text->text);
2167 if (!tag)
2168 goto nextword;
2169
2170 for (i = 0; i < tag->nrefs; i++) {
2171 indexentry *entry = tag->refs[i];
2172 paper_idx *pi = (paper_idx *)entry->backend_data;
2173
2174 /*
2175 * If the same index term is indexed twice
2176 * within the same section, we only want to
2177 * mention it once in the index.
2178 */
2179 if (pi->lastpage != page) {
2180 word **wp;
2181
2182 if (pi->lastword) {
2183 pi->lastword = pi->lastword->next =
2184 fake_word(L",");
2185 pi->lastword = pi->lastword->next =
2186 fake_space_word();
2187 wp = &pi->lastword->next;
2188 } else
2189 wp = &pi->words;
2190
2191 pi->lastword = *wp =
2192 fake_page_ref(page);
2193 pi->lastword = pi->lastword->next =
2194 fake_word(page->number);
2195 pi->lastword = pi->lastword->next =
2196 fake_end_ref();
2197 }
2198
2199 pi->lastpage = page;
2200 }
2201 }
2202 goto nextword;
2203 }
2204
2205 style = towordstyle(text->type);
2206 type = removeattr(text->type);
2207
2208 findex = (style == word_Normal ? FONT_NORMAL :
2209 style == word_Emph ? FONT_EMPH :
2210 FONT_CODE);
2211
2212 if (style == word_Code || style == word_WeakCode) flags |= RS_NOLIG;
2213 flags |= pdata->extraflags;
2214
2215 if (type == word_Normal) {
2216 str = text->text;
2217 } else if (type == word_WhiteSpace) {
2218 x += pdata->sizes[findex] *
2219 string_width(pdata->fonts[findex], L" ", NULL, 0);
2220 if (nspaces && findex != FONT_CODE) {
2221 x += (*nspace+1) * shortfall / nspaces;
2222 x -= *nspace * shortfall / nspaces;
2223 (*nspace)++;
2224 }
2225 goto nextword;
2226 } else /* if (type == word_Quote) */ {
2227 if (text->aux == quote_Open)
2228 str = conf->lquote;
2229 else
2230 str = conf->rquote;
2231 }
2232
2233 (void) string_width(pdata->fonts[findex], str, &errs, flags);
2234
2235 if (errs && text->alt)
2236 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
2237 xr, shortfall, nspaces, nspace, keywords, idx,
2238 conf);
2239 else
2240 x = render_string(page, pdata->fonts[findex],
2241 pdata->sizes[findex], x, y, str, flags);
2242
2243 if (*xr)
2244 (*xr)->rx = x;
2245
2246 nextword:
2247 text = text->next;
2248 }
2249
2250 return x;
2251 }
2252
2253 /*
2254 * Returns the last x position used on the line.
2255 */
2256 static int render_line(line_data *ldata, int left_x, int top_y,
2257 xref_dest *dest, keywordlist *keywords, indexdata *idx,
2258 paper_conf *conf)
2259 {
2260 int nspace;
2261 xref *xr;
2262 int ret = 0;
2263
2264 if (ldata->aux_text) {
2265 int x;
2266 xr = NULL;
2267 nspace = 0;
2268 x = render_text(ldata->page, ldata->pdata, ldata,
2269 left_x + ldata->aux_left_indent,
2270 top_y - ldata->ypos,
2271 ldata->aux_text, NULL, &xr, 0, 0, &nspace,
2272 keywords, idx, conf);
2273 if (ldata->aux_text_2)
2274 render_text(ldata->page, ldata->pdata, ldata,
2275 x, top_y - ldata->ypos,
2276 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace,
2277 keywords, idx, conf);
2278 }
2279 nspace = 0;
2280
2281 if (ldata->first) {
2282 /*
2283 * There might be a cross-reference carried over from a
2284 * previous line.
2285 */
2286 if (dest->type != NONE) {
2287 xr = snew(xref);
2288 xr->next = NULL;
2289 xr->dest = *dest; /* structure copy */
2290 if (ldata->page->last_xref)
2291 ldata->page->last_xref->next = xr;
2292 else
2293 ldata->page->first_xref = xr;
2294 ldata->page->last_xref = xr;
2295 xr->lx = xr->rx = left_x + ldata->xpos;
2296 xr->by = top_y - ldata->ypos;
2297 xr->ty = top_y - ldata->ypos + ldata->line_height;
2298 } else
2299 xr = NULL;
2300
2301 {
2302 int extra_indent, shortfall, spaces;
2303 int just = ldata->pdata->justification;
2304
2305 /*
2306 * All forms of justification become JUST when we have
2307 * to squeeze the paragraph.
2308 */
2309 if (ldata->hshortfall < 0)
2310 just = JUST;
2311
2312 switch (just) {
2313 case JUST:
2314 shortfall = ldata->hshortfall;
2315 spaces = ldata->nspaces;
2316 extra_indent = 0;
2317 break;
2318 case LEFT:
2319 shortfall = spaces = extra_indent = 0;
2320 break;
2321 case RIGHT:
2322 shortfall = spaces = 0;
2323 extra_indent = ldata->real_shortfall;
2324 break;
2325 }
2326
2327 ret = render_text(ldata->page, ldata->pdata, ldata,
2328 left_x + ldata->xpos + extra_indent,
2329 top_y - ldata->ypos, ldata->first, ldata->end,
2330 &xr, shortfall, spaces, &nspace,
2331 keywords, idx, conf);
2332 }
2333
2334 if (xr) {
2335 /*
2336 * There's a cross-reference continued on to the next line.
2337 */
2338 *dest = xr->dest;
2339 } else
2340 dest->type = NONE;
2341 }
2342
2343 return ret;
2344 }
2345
2346 static void render_para(para_data *pdata, paper_conf *conf,
2347 keywordlist *keywords, indexdata *idx,
2348 paragraph *index_placeholder, page_data *index_page)
2349 {
2350 int last_x;
2351 xref *cxref;
2352 page_data *cxref_page;
2353 xref_dest dest;
2354 para_data *target;
2355 line_data *ldata;
2356
2357 dest.type = NONE;
2358 cxref = NULL;
2359 cxref_page = NULL;
2360
2361 for (ldata = pdata->first; ldata; ldata = ldata->next) {
2362 /*
2363 * If this is a contents entry, we expect to have a single
2364 * enormous cross-reference rectangle covering the whole
2365 * thing. (Unless, of course, it spans multiple pages.)
2366 */
2367 if (pdata->contents_entry && ldata->page != cxref_page) {
2368 cxref_page = ldata->page;
2369 cxref = snew(xref);
2370 cxref->next = NULL;
2371 cxref->dest.type = PAGE;
2372 if (pdata->contents_entry == index_placeholder) {
2373 cxref->dest.page = index_page;
2374 } else {
2375 assert(pdata->contents_entry->private_data);
2376 target = (para_data *)pdata->contents_entry->private_data;
2377 cxref->dest.page = target->first->page;
2378 }
2379 cxref->dest.url = NULL;
2380 if (ldata->page->last_xref)
2381 ldata->page->last_xref->next = cxref;
2382 else
2383 ldata->page->first_xref = cxref;
2384 ldata->page->last_xref = cxref;
2385 cxref->lx = conf->left_margin;
2386 cxref->rx = conf->paper_width - conf->right_margin;
2387 cxref->ty = conf->paper_height - conf->top_margin
2388 - ldata->ypos + ldata->line_height;
2389 }
2390 if (pdata->contents_entry) {
2391 assert(cxref != NULL);
2392 cxref->by = conf->paper_height - conf->top_margin
2393 - ldata->ypos;
2394 }
2395
2396 last_x = render_line(ldata, conf->left_margin,
2397 conf->paper_height - conf->top_margin,
2398 &dest, keywords, idx, conf);
2399 if (ldata == pdata->last)
2400 break;
2401 }
2402
2403 /*
2404 * If this is a contents entry, add leaders and a page
2405 * number.
2406 */
2407 if (pdata->contents_entry) {
2408 word *w;
2409 wchar_t *num;
2410 int wid;
2411 int x;
2412
2413 if (pdata->contents_entry == index_placeholder) {
2414 num = index_page->number;
2415 } else {
2416 assert(pdata->contents_entry->private_data);
2417 target = (para_data *)pdata->contents_entry->private_data;
2418 num = target->first->page->number;
2419 }
2420
2421 w = fake_word(num);
2422 wid = paper_width_simple(pdata, w, conf);
2423 sfree(w);
2424
2425 for (x = 0; x < conf->base_width; x += conf->leader_separation)
2426 if (x - conf->leader_separation > last_x - conf->left_margin &&
2427 x + conf->leader_separation < conf->base_width - wid)
2428 render_string(pdata->last->page,
2429 pdata->fonts[FONT_NORMAL],
2430 pdata->sizes[FONT_NORMAL],
2431 conf->left_margin + x,
2432 (conf->paper_height - conf->top_margin -
2433 pdata->last->ypos), L".", 0);
2434
2435 render_string(pdata->last->page,
2436 pdata->fonts[FONT_NORMAL],
2437 pdata->sizes[FONT_NORMAL],
2438 conf->paper_width - conf->right_margin - wid,
2439 (conf->paper_height - conf->top_margin -
2440 pdata->last->ypos), num, 0);
2441 }
2442
2443 /*
2444 * Render any rectangle (chapter title underline or rule)
2445 * that goes with this paragraph.
2446 */
2447 switch (pdata->rect_type) {
2448 case RECT_CHAPTER_UNDERLINE:
2449 add_rect_to_page(pdata->last->page,
2450 conf->left_margin,
2451 (conf->paper_height - conf->top_margin -
2452 pdata->last->ypos -
2453 conf->chapter_underline_depth),
2454 conf->base_width,
2455 conf->chapter_underline_thickness);
2456 break;
2457 case RECT_RULE:
2458 add_rect_to_page(pdata->first->page,
2459 conf->left_margin + pdata->first->xpos,
2460 (conf->paper_height - conf->top_margin -
2461 pdata->last->ypos -
2462 pdata->last->line_height),
2463 conf->base_width - pdata->first->xpos,
2464 pdata->last->line_height);
2465 break;
2466 default: /* placate gcc */
2467 break;
2468 }
2469 }
2470
2471 static para_data *code_paragraph(int indent, word *words, paper_conf *conf)
2472 {
2473 para_data *pdata = snew(para_data);
2474
2475 /*
2476 * For code paragraphs, I'm going to hack grievously and
2477 * pretend the three normal fonts are the three code paragraph
2478 * fonts.
2479 */
2480 setfont(pdata, &conf->fcode);
2481
2482 pdata->first = pdata->last = NULL;
2483 pdata->outline_level = -1;
2484 pdata->rect_type = RECT_NONE;
2485 pdata->contents_entry = NULL;
2486 pdata->justification = LEFT;
2487 pdata->extraflags = RS_NOLIG;
2488
2489 for (; words; words = words->next) {
2490 wchar_t *t, *e, *start;
2491 word *lhead = NULL, *ltail = NULL, *w;
2492 line_data *ldata;
2493 int prev = -1, curr;
2494
2495 t = words->text;
2496 if (words->next && words->next->type == word_Emph) {
2497 e = words->next->text;
2498 words = words->next;
2499 } else
2500 e = NULL;
2501
2502 start = t;
2503
2504 while (*start) {
2505 while (*t) {
2506 if (!e || !*e)
2507 curr = 0;
2508 else if (*e == L'i')
2509 curr = 1;
2510 else if (*e == L'b')
2511 curr = 2;
2512 else
2513 curr = 0;
2514
2515 if (prev < 0)
2516 prev = curr;
2517
2518 if (curr != prev)
2519 break;
2520
2521 t++;
2522 if (e && *e)
2523 e++;
2524 }
2525
2526 /*
2527 * We've isolated a maximal subsequence of the line
2528 * which has the same emphasis. Form it into a word
2529 * structure.
2530 */
2531 w = snew(word);
2532 w->next = NULL;
2533 w->alt = NULL;
2534 w->type = (prev == 0 ? word_WeakCode :
2535 prev == 1 ? word_Emph : word_Normal);
2536 w->text = snewn(t-start+1, wchar_t);
2537 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
2538 w->text[t-start] = '\0';
2539 w->breaks = FALSE;
2540
2541 if (ltail)
2542 ltail->next = w;
2543 else
2544 lhead = w;
2545 ltail = w;
2546
2547 start = t;
2548 prev = -1;
2549 }
2550
2551 ldata = snew(line_data);
2552
2553 ldata->pdata = pdata;
2554 ldata->first = lhead;
2555 ldata->end = NULL;
2556 ldata->line_height = conf->fcode.font_size * UNITS_PER_PT;
2557
2558 ldata->xpos = indent;
2559
2560 if (pdata->last) {
2561 pdata->last->next = ldata;
2562 ldata->prev = pdata->last;
2563 } else {
2564 pdata->first = ldata;
2565 ldata->prev = NULL;
2566 }
2567 ldata->next = NULL;
2568 pdata->last = ldata;
2569
2570 ldata->hshortfall = 0;
2571 ldata->nspaces = 0;
2572 ldata->aux_text = NULL;
2573 ldata->aux_text_2 = NULL;
2574 ldata->aux_left_indent = 0;
2575 /* General opprobrium for breaking in a code paragraph. */
2576 ldata->penalty_before = ldata->penalty_after = 50000;
2577 }
2578
2579 standard_line_spacing(pdata, conf);
2580
2581 return pdata;
2582 }
2583
2584 static para_data *rule_paragraph(int indent, paper_conf *conf)
2585 {
2586 para_data *pdata = snew(para_data);
2587 line_data *ldata;
2588
2589 ldata = snew(line_data);
2590
2591 ldata->pdata = pdata;
2592 ldata->first = NULL;
2593 ldata->end = NULL;
2594 ldata->line_height = conf->rule_thickness;
2595
2596 ldata->xpos = indent;
2597
2598 ldata->prev = NULL;
2599 ldata->next = NULL;
2600
2601 ldata->hshortfall = 0;
2602 ldata->nspaces = 0;
2603 ldata->aux_text = NULL;
2604 ldata->aux_text_2 = NULL;
2605 ldata->aux_left_indent = 0;
2606
2607 /*
2608 * Better to break after a rule than before it
2609 */
2610 ldata->penalty_after += 100000;
2611 ldata->penalty_before += -100000;
2612
2613 pdata->first = pdata->last = ldata;
2614 pdata->outline_level = -1;
2615 pdata->rect_type = RECT_RULE;
2616 pdata->contents_entry = NULL;
2617 pdata->justification = LEFT;
2618 pdata->extraflags = 0;
2619
2620 standard_line_spacing(pdata, conf);
2621
2622 return pdata;
2623 }
2624
2625 /*
2626 * Plain-text-like formatting for outline titles.
2627 */
2628 static void paper_rdaddw(rdstring *rs, word *text) {
2629 for (; text; text = text->next) switch (text->type) {
2630 case word_HyperLink:
2631 case word_HyperEnd:
2632 case word_UpperXref:
2633 case word_LowerXref:
2634 case word_XrefEnd:
2635 case word_IndexRef:
2636 break;
2637
2638 case word_Normal:
2639 case word_Emph:
2640 case word_Code:
2641 case word_WeakCode:
2642 case word_WhiteSpace:
2643 case word_EmphSpace:
2644 case word_CodeSpace:
2645 case word_WkCodeSpace:
2646 case word_Quote:
2647 case word_EmphQuote:
2648 case word_CodeQuote:
2649 case word_WkCodeQuote:
2650 assert(text->type != word_CodeQuote &&
2651 text->type != word_WkCodeQuote);
2652 if (towordstyle(text->type) == word_Emph &&
2653 (attraux(text->aux) == attr_First ||
2654 attraux(text->aux) == attr_Only))
2655 rdadd(rs, L'_'); /* FIXME: configurability */
2656 else if (towordstyle(text->type) == word_Code &&
2657 (attraux(text->aux) == attr_First ||
2658 attraux(text->aux) == attr_Only))
2659 rdadd(rs, L'\''); /* FIXME: configurability */
2660 if (removeattr(text->type) == word_Normal) {
2661 rdadds(rs, text->text);
2662 } else if (removeattr(text->type) == word_WhiteSpace) {
2663 rdadd(rs, L' ');
2664 } else if (removeattr(text->type) == word_Quote) {
2665 rdadd(rs, L'\''); /* fixme: configurability */
2666 }
2667 if (towordstyle(text->type) == word_Emph &&
2668 (attraux(text->aux) == attr_Last ||
2669 attraux(text->aux) == attr_Only))
2670 rdadd(rs, L'_'); /* FIXME: configurability */
2671 else if (towordstyle(text->type) == word_Code &&
2672 (attraux(text->aux) == attr_Last ||
2673 attraux(text->aux) == attr_Only))
2674 rdadd(rs, L'\''); /* FIXME: configurability */
2675 break;
2676 }
2677 }
2678
2679 static wchar_t *prepare_outline_title(word *first, wchar_t *separator,
2680 word *second)
2681 {
2682 rdstring rs = {0, 0, NULL};
2683
2684 if (first)
2685 paper_rdaddw(&rs, first);
2686 if (separator)
2687 rdadds(&rs, separator);
2688 if (second)
2689 paper_rdaddw(&rs, second);
2690
2691 return rs.text;
2692 }
2693
2694 static word *fake_word(wchar_t *text)
2695 {
2696 word *ret = snew(word);
2697 ret->next = NULL;
2698 ret->alt = NULL;
2699 ret->type = word_Normal;
2700 ret->text = ustrdup(text);
2701 ret->breaks = FALSE;
2702 ret->aux = 0;
2703 return ret;
2704 }
2705
2706 static word *fake_space_word(void)
2707 {
2708 word *ret = snew(word);
2709 ret->next = NULL;
2710 ret->alt = NULL;
2711 ret->type = word_WhiteSpace;
2712 ret->text = NULL;
2713 ret->breaks = TRUE;
2714 ret->aux = 0;
2715 return ret;
2716 }
2717
2718 static word *fake_page_ref(page_data *page)
2719 {
2720 word *ret = snew(word);
2721 ret->next = NULL;
2722 ret->alt = NULL;
2723 ret->type = word_PageXref;
2724 ret->text = NULL;
2725 ret->breaks = FALSE;
2726 ret->aux = 0;
2727 ret->private_data = page;
2728 return ret;
2729 }
2730
2731 static word *fake_end_ref(void)
2732 {
2733 word *ret = snew(word);
2734 ret->next = NULL;
2735 ret->alt = NULL;
2736 ret->type = word_XrefEnd;
2737 ret->text = NULL;
2738 ret->breaks = FALSE;
2739 ret->aux = 0;
2740 return ret;
2741 }
2742
2743 static word *prepare_contents_title(word *first, wchar_t *separator,
2744 word *second)
2745 {
2746 word *ret;
2747 word **wptr, *w;
2748
2749 wptr = &ret;
2750
2751 if (first) {
2752 w = dup_word_list(first);
2753 *wptr = w;
2754 while (w->next)
2755 w = w->next;
2756 wptr = &w->next;
2757 }
2758
2759 if (separator) {
2760 w = fake_word(separator);
2761 *wptr = w;
2762 wptr = &w->next;
2763 }
2764
2765 if (second) {
2766 *wptr = dup_word_list(second);
2767 }
2768
2769 return ret;
2770 }
2771
2772 static void fold_into_page(page_data *dest, page_data *src, int right_shift)
2773 {
2774 line_data *ldata;
2775
2776 if (!src->first_line)
2777 return;
2778
2779 if (dest->last_line) {
2780 dest->last_line->next = src->first_line;
2781 src->first_line->prev = dest->last_line;
2782 }
2783 dest->last_line = src->last_line;
2784
2785 for (ldata = src->first_line; ldata; ldata = ldata->next) {
2786 ldata->page = dest;
2787 ldata->xpos += right_shift;
2788
2789 if (ldata == src->last_line)
2790 break;
2791 }
2792 }