And now the page numbers in the index are PDF cross-references too.
[sgt/halibut] / bk_paper.c
1 /*
2 * Paper printing pre-backend for Halibut.
3 *
4 * This module does all the processing common to both PostScript
5 * and PDF output: selecting fonts, line wrapping and page breaking
6 * in accordance with font metrics, laying out the contents and
7 * index pages, generally doing all the page layout. After this,
8 * bk_ps.c and bk_pdf.c should only need to do linear translations
9 * into their literal output format.
10 */
11
12 /*
13 * To be done:
14 *
15 * - header/footer? Page numbers at least would be handy. Fully
16 * configurable footer can wait, though.
17 *
18 * That should bring us to the same level of functionality that
19 * original-Halibut had, and the same in PDF plus the obvious
20 * interactive navigation features. After that, in future work:
21 *
22 * - linearised PDF, perhaps?
23 *
24 * - I'm uncertain of whether I need to include a ToUnicode CMap
25 * in each of my font definitions in PDF. Currently things (by
26 * which I mean cut and paste out of acroread) seem to be
27 * working fairly happily without it, but I don't know.
28 *
29 * - configurability
30 *
31 * - title pages
32 */
33
34 #include <assert.h>
35 #include <stdio.h>
36
37 #include "halibut.h"
38 #include "paper.h"
39
40 typedef struct paper_conf_Tag paper_conf;
41 typedef struct paper_idx_Tag paper_idx;
42
43 struct paper_conf_Tag {
44 int paper_width;
45 int paper_height;
46 int left_margin;
47 int top_margin;
48 int right_margin;
49 int bottom_margin;
50 int indent_list_bullet;
51 int indent_list;
52 int indent_quote;
53 int base_leading;
54 int base_para_spacing;
55 int chapter_top_space;
56 int sect_num_left_space;
57 int chapter_underline_depth;
58 int chapter_underline_thickness;
59 int rule_thickness;
60 int base_font_size;
61 int contents_indent_step;
62 int contents_margin;
63 int leader_separation;
64 int index_gutter;
65 int index_cols;
66 int index_minsep;
67 /* These are derived from the above */
68 int base_width;
69 int page_height;
70 int index_colwidth;
71 /* Fonts used in the configuration */
72 font_data *tr, *ti, *hr, *hi, *cr, *co, *cb;
73 };
74
75 struct paper_idx_Tag {
76 /*
77 * Word list giving the page numbers on which this index entry
78 * appears. Also the last word in the list, for ease of
79 * construction.
80 */
81 word *words;
82 word *lastword;
83 /*
84 * The last page added to the list (so we can ensure we don't
85 * add one twice).
86 */
87 page_data *lastpage;
88 };
89
90 enum {
91 word_PageXref = word_NotWordType + 1
92 };
93
94 static font_data *make_std_font(font_list *fontlist, char const *name);
95 static void wrap_paragraph(para_data *pdata, word *words,
96 int w, int i1, int i2);
97 static page_data *page_breaks(line_data *first, line_data *last,
98 int page_height, int ncols, int headspace);
99 static int render_string(page_data *page, font_data *font, int fontsize,
100 int x, int y, wchar_t *str);
101 static int render_line(line_data *ldata, int left_x, int top_y,
102 xref_dest *dest, keywordlist *keywords, indexdata *idx);
103 static void render_para(para_data *pdata, paper_conf *conf,
104 keywordlist *keywords, indexdata *idx,
105 paragraph *index_placeholder, page_data *index_page);
106 static int paper_width_simple(para_data *pdata, word *text);
107 static para_data *code_paragraph(int indent, word *words, paper_conf *conf);
108 static para_data *rule_paragraph(int indent, paper_conf *conf);
109 static void add_rect_to_page(page_data *page, int x, int y, int w, int h);
110 static para_data *make_para_data(int ptype, int paux, int indent, int rmargin,
111 word *pkwtext, word *pkwtext2, word *pwords,
112 paper_conf *conf);
113 static void standard_line_spacing(para_data *pdata, paper_conf *conf);
114 static wchar_t *prepare_outline_title(word *first, wchar_t *separator,
115 word *second);
116 static word *fake_word(wchar_t *text);
117 static word *fake_space_word(void);
118 static word *fake_page_ref(page_data *page);
119 static word *fake_end_ref(void);
120 static word *prepare_contents_title(word *first, wchar_t *separator,
121 word *second);
122 static void fold_into_page(page_data *dest, page_data *src, int right_shift);
123
124 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
125 indexdata *idx) {
126 paragraph *p;
127 document *doc;
128 int indent, used_contents;
129 para_data *pdata, *firstpara = NULL, *lastpara = NULL;
130 para_data *firstcont, *lastcont;
131 line_data *firstline, *lastline, *firstcontline, *lastcontline;
132 page_data *pages;
133 font_list *fontlist;
134 paper_conf *conf;
135 int has_index;
136 int pagenum;
137 paragraph index_placeholder_para;
138 page_data *first_index_page;
139
140 /*
141 * FIXME: All these things ought to become configurable.
142 */
143 conf = mknew(paper_conf);
144 conf->paper_width = 595 * 4096;
145 conf->paper_height = 841 * 4096;
146 conf->left_margin = 72 * 4096;
147 conf->top_margin = 72 * 4096;
148 conf->right_margin = 72 * 4096;
149 conf->bottom_margin = 108 * 4096;
150 conf->indent_list_bullet = 6 * 4096;
151 conf->indent_list = 24 * 4096;
152 conf->indent_quote = 18 * 4096;
153 conf->base_leading = 4096;
154 conf->base_para_spacing = 10 * 4096;
155 conf->chapter_top_space = 72 * 4096;
156 conf->sect_num_left_space = 12 * 4096;
157 conf->chapter_underline_depth = 14 * 4096;
158 conf->chapter_underline_thickness = 3 * 4096;
159 conf->rule_thickness = 1 * 4096;
160 conf->base_font_size = 12;
161 conf->contents_indent_step = 24 * 4096;
162 conf->contents_margin = 84 * 4096;
163 conf->leader_separation = 12 * 4096;
164 conf->index_gutter = 36 * 4096;
165 conf->index_cols = 2;
166 conf->index_minsep = 18 * 4096;
167
168 conf->base_width =
169 conf->paper_width - conf->left_margin - conf->right_margin;
170 conf->page_height =
171 conf->paper_height - conf->top_margin - conf->bottom_margin;
172 conf->index_colwidth =
173 (conf->base_width - (conf->index_cols-1) * conf->index_gutter)
174 / conf->index_cols;
175
176 IGNORE(idx); /* FIXME */
177
178 /*
179 * First, set up some font structures.
180 */
181 fontlist = mknew(font_list);
182 fontlist->head = fontlist->tail = NULL;
183 conf->tr = make_std_font(fontlist, "Times-Roman");
184 conf->ti = make_std_font(fontlist, "Times-Italic");
185 conf->hr = make_std_font(fontlist, "Helvetica-Bold");
186 conf->hi = make_std_font(fontlist, "Helvetica-BoldOblique");
187 conf->cr = make_std_font(fontlist, "Courier");
188 conf->co = make_std_font(fontlist, "Courier-Oblique");
189 conf->cb = make_std_font(fontlist, "Courier-Bold");
190
191 /*
192 * Set up a data structure to collect page numbers for each
193 * index entry.
194 */
195 {
196 int i;
197 indexentry *entry;
198
199 has_index = FALSE;
200
201 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
202 paper_idx *pi = mknew(paper_idx);
203
204 has_index = TRUE;
205
206 pi->words = pi->lastword = NULL;
207 pi->lastpage = NULL;
208
209 entry->backend_data = pi;
210 }
211 }
212
213 /*
214 * Format the contents entry for each heading.
215 */
216 {
217 word *contents_title;
218 contents_title = fake_word(L"Contents");
219
220 firstcont = make_para_data(para_UnnumberedChapter, 0, 0, 0,
221 NULL, NULL, contents_title, conf);
222 lastcont = firstcont;
223 lastcont->next = NULL;
224 firstcontline = firstcont->first;
225 lastcontline = lastcont->last;
226 for (p = sourceform; p; p = p->next) {
227 word *words;
228 int indent;
229
230 switch (p->type) {
231 case para_Chapter:
232 case para_Appendix:
233 case para_UnnumberedChapter:
234 case para_Heading:
235 case para_Subsect:
236 switch (p->type) {
237 case para_Chapter:
238 case para_Appendix:
239 words = prepare_contents_title(p->kwtext, L": ", p->words);
240 indent = 0;
241 break;
242 case para_UnnumberedChapter:
243 words = prepare_contents_title(NULL, NULL, p->words);
244 indent = 0;
245 break;
246 case para_Heading:
247 case para_Subsect:
248 words = prepare_contents_title(p->kwtext2, L" ", p->words);
249 indent = (p->aux + 1) * conf->contents_indent_step;
250 break;
251 }
252 pdata = make_para_data(para_Normal, p->aux, indent,
253 conf->contents_margin,
254 NULL, NULL, words, conf);
255 pdata->next = NULL;
256 pdata->contents_entry = p;
257 lastcont->next = pdata;
258 lastcont = pdata;
259
260 /*
261 * Link all contents line structures together into
262 * a big list.
263 */
264 if (pdata->first) {
265 if (lastcontline) {
266 lastcontline->next = pdata->first;
267 pdata->first->prev = lastcontline;
268 } else {
269 firstcontline = pdata->first;
270 pdata->first->prev = NULL;
271 }
272 lastcontline = pdata->last;
273 lastcontline->next = NULL;
274 }
275
276 break;
277 }
278 }
279
280 /*
281 * And one extra one, for the index.
282 */
283 if (has_index) {
284 pdata = make_para_data(para_Normal, 0, 0,
285 conf->contents_margin,
286 NULL, NULL, fake_word(L"Index"), conf);
287 pdata->next = NULL;
288 pdata->contents_entry = &index_placeholder_para;
289 lastcont->next = pdata;
290 lastcont = pdata;
291
292 if (pdata->first) {
293 if (lastcontline) {
294 lastcontline->next = pdata->first;
295 pdata->first->prev = lastcontline;
296 } else {
297 firstcontline = pdata->first;
298 pdata->first->prev = NULL;
299 }
300 lastcontline = pdata->last;
301 lastcontline->next = NULL;
302 }
303 }
304 }
305
306 /*
307 * Do the main paragraph formatting.
308 */
309 indent = 0;
310 used_contents = FALSE;
311 firstline = lastline = NULL;
312 for (p = sourceform; p; p = p->next) {
313 p->private_data = NULL;
314
315 switch (p->type) {
316 /*
317 * These paragraph types are either invisible or don't
318 * define text in the normal sense. Either way, they
319 * don't require wrapping.
320 */
321 case para_IM:
322 case para_BR:
323 case para_Biblio:
324 case para_NotParaType:
325 case para_Config:
326 case para_VersionID:
327 case para_NoCite:
328 break;
329
330 /*
331 * These paragraph types don't require wrapping, but
332 * they do affect the line width to which we wrap the
333 * rest of the paragraphs, so we need to pay attention.
334 */
335 case para_LcontPush:
336 indent += conf->indent_list; break;
337 case para_LcontPop:
338 indent -= conf->indent_list; assert(indent >= 0); break;
339 case para_QuotePush:
340 indent += conf->indent_quote; break;
341 case para_QuotePop:
342 indent -= conf->indent_quote; assert(indent >= 0); break;
343
344 /*
345 * This paragraph type is special. Process it
346 * specially.
347 */
348 case para_Code:
349 pdata = code_paragraph(indent, p->words, conf);
350 p->private_data = pdata;
351 if (pdata->first != pdata->last) {
352 pdata->first->penalty_after += 100000;
353 pdata->last->penalty_before += 100000;
354 }
355 break;
356
357 /*
358 * This paragraph is also special.
359 */
360 case para_Rule:
361 pdata = rule_paragraph(indent, conf);
362 p->private_data = pdata;
363 break;
364
365 /*
366 * All of these paragraph types require wrapping in the
367 * ordinary way. So we must supply a set of fonts, a
368 * line width and auxiliary information (e.g. bullet
369 * text) for each one.
370 */
371 case para_Chapter:
372 case para_Appendix:
373 case para_UnnumberedChapter:
374 case para_Heading:
375 case para_Subsect:
376 case para_Normal:
377 case para_BiblioCited:
378 case para_Bullet:
379 case para_NumberedList:
380 case para_DescribedThing:
381 case para_Description:
382 case para_Copyright:
383 case para_Title:
384 pdata = make_para_data(p->type, p->aux, indent, 0,
385 p->kwtext, p->kwtext2, p->words, conf);
386
387 p->private_data = pdata;
388
389 break;
390 }
391
392 if (p->private_data) {
393 pdata = (para_data *)p->private_data;
394
395 /*
396 * If this is the first non-title heading, we link the
397 * contents section in before it.
398 */
399 if (!used_contents && pdata->outline_level > 0) {
400 used_contents = TRUE;
401 if (lastpara)
402 lastpara->next = firstcont;
403 else
404 firstpara = firstcont;
405 lastpara = lastcont;
406 assert(lastpara->next == NULL);
407
408 if (lastline) {
409 lastline->next = firstcontline;
410 firstcontline->prev = lastline;
411 } else {
412 firstline = firstcontline;
413 firstcontline->prev = NULL;
414 }
415 assert(lastcontline != NULL);
416 lastline = lastcontline;
417 lastline->next = NULL;
418 }
419
420 /*
421 * Link all line structures together into a big list.
422 */
423 if (pdata->first) {
424 if (lastline) {
425 lastline->next = pdata->first;
426 pdata->first->prev = lastline;
427 } else {
428 firstline = pdata->first;
429 pdata->first->prev = NULL;
430 }
431 lastline = pdata->last;
432 lastline->next = NULL;
433 }
434
435 /*
436 * Link all paragraph structures together similarly.
437 */
438 pdata->next = NULL;
439 if (lastpara)
440 lastpara->next = pdata;
441 else
442 firstpara = pdata;
443 lastpara = pdata;
444 }
445 }
446
447 /*
448 * Now we have an enormous linked list of every line of text in
449 * the document. Break it up into pages.
450 */
451 pages = page_breaks(firstline, lastline, conf->page_height, 0, 0);
452
453 /*
454 * Number the pages.
455 */
456 {
457 char buf[40];
458 page_data *page;
459
460 pagenum = 0;
461
462 for (page = pages; page; page = page->next) {
463 sprintf(buf, "%d", ++pagenum);
464 page->number = ufroma_dup(buf);
465 }
466
467 if (has_index) {
468 first_index_page = mknew(page_data);
469 first_index_page->next = first_index_page->prev = NULL;
470 first_index_page->first_line = NULL;
471 first_index_page->last_line = NULL;
472 first_index_page->first_text = first_index_page->last_text = NULL;
473 first_index_page->first_xref = first_index_page->last_xref = NULL;
474 first_index_page->first_rect = first_index_page->last_rect = NULL;
475
476 /* And don't forget the as-yet-uncreated index. */
477 sprintf(buf, "%d", ++pagenum);
478 first_index_page->number = ufroma_dup(buf);
479 }
480 }
481
482 /*
483 * Now we're ready to actually lay out the pages. We do this by
484 * looping over _paragraphs_, since we may need to track cross-
485 * references between lines and even across pages.
486 */
487 for (pdata = firstpara; pdata; pdata = pdata->next)
488 render_para(pdata, conf, keywords, idx,
489 &index_placeholder_para, first_index_page);
490
491 /*
492 * Now we've laid out the main body pages, we should have
493 * acquired a full set of page numbers for the index.
494 */
495 if (has_index) {
496 int i;
497 indexentry *entry;
498 word *index_title;
499 para_data *firstidx, *lastidx;
500 line_data *firstidxline, *lastidxline, *ldata;
501 page_data *ipages, *ipages2, *page;
502
503 /*
504 * Create a set of paragraphs for the index.
505 */
506 index_title = fake_word(L"Index");
507
508 firstidx = make_para_data(para_UnnumberedChapter, 0, 0, 0,
509 NULL, NULL, index_title, conf);
510 lastidx = firstidx;
511 lastidx->next = NULL;
512 firstidxline = firstidx->first;
513 lastidxline = lastidx->last;
514 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
515 paper_idx *pi = (paper_idx *)entry->backend_data;
516 para_data *text, *pages;
517
518 text = make_para_data(para_Normal, 0, 0,
519 conf->base_width - conf->index_colwidth,
520 NULL, NULL, entry->text, conf);
521
522 pages = make_para_data(para_Normal, 0, 0,
523 conf->base_width - conf->index_colwidth,
524 NULL, NULL, pi->words, conf);
525
526 text->justification = LEFT;
527 pages->justification = RIGHT;
528 text->last->space_after = pages->first->space_before =
529 conf->base_leading / 2;
530
531 pages->last->space_after = text->first->space_before =
532 conf->base_leading;
533
534 assert(text->first);
535 assert(pages->first);
536 assert(lastidxline);
537 assert(lastidx);
538
539 /*
540 * If feasible, fold the two halves of the index entry
541 * together.
542 */
543 if (text->last->real_shortfall + pages->first->real_shortfall >
544 conf->index_colwidth + conf->index_minsep) {
545 text->last->space_after = -1;
546 pages->first->space_before = -pages->first->line_height+1;
547 }
548
549 lastidx->next = text;
550 text->next = pages;
551 pages->next = NULL;
552 lastidx = pages;
553
554 /*
555 * Link all index line structures together into
556 * a big list.
557 */
558 text->last->next = pages->first;
559 pages->first->prev = text->last;
560
561 lastidxline->next = text->first;
562 text->first->prev = lastidxline;
563
564 lastidxline = pages->last;
565
566 /*
567 * Breaking an index entry anywhere is so bad that I
568 * think I'm going to forbid it totally.
569 */
570 for (ldata = text->first; ldata && ldata->next;
571 ldata = ldata->next) {
572 ldata->next->space_before += ldata->space_after + 1;
573 ldata->space_after = -1;
574 }
575 }
576
577 /*
578 * Now break the index into pages.
579 */
580 ipages = page_breaks(firstidxline, firstidxline, conf->page_height,
581 0, 0);
582 ipages2 = page_breaks(firstidxline->next, lastidxline,
583 conf->page_height,
584 conf->index_cols,
585 firstidxline->space_before +
586 firstidxline->line_height +
587 firstidxline->space_after);
588
589 /*
590 * This will have put each _column_ of the index on a
591 * separate page, which isn't what we want. Fold the pages
592 * back together.
593 */
594 page = ipages2;
595 while (page) {
596 int i;
597
598 for (i = 1; i < conf->index_cols; i++)
599 if (page->next) {
600 page_data *tpage;
601
602 fold_into_page(page, page->next,
603 i * (conf->index_colwidth +
604 conf->index_gutter));
605 tpage = page->next;
606 page->next = page->next->next;
607 if (page->next)
608 page->next->prev = page;
609 sfree(tpage);
610 }
611
612 page = page->next;
613 }
614 /* Also fold the heading on to the same page as the index items. */
615 fold_into_page(ipages, ipages2, 0);
616 ipages->next = ipages2->next;
617 if (ipages->next)
618 ipages->next->prev = ipages;
619 sfree(ipages2);
620 fold_into_page(first_index_page, ipages, 0);
621 first_index_page->next = ipages->next;
622 if (first_index_page->next)
623 first_index_page->next->prev = first_index_page;
624 sfree(ipages);
625 ipages = first_index_page;
626
627 /*
628 * Number the index pages, except the already-numbered
629 * first one.
630 */
631 for (page = ipages->next; page; page = page->next) {
632 char buf[40];
633 sprintf(buf, "%d", ++pagenum);
634 page->number = ufroma_dup(buf);
635 }
636
637 /*
638 * Render the index pages.
639 */
640 for (pdata = firstidx; pdata; pdata = pdata->next)
641 render_para(pdata, conf, keywords, idx,
642 &index_placeholder_para, first_index_page);
643
644 /*
645 * Link the index page list on to the end of the main page
646 * list.
647 */
648 if (!pages)
649 pages = ipages;
650 else {
651 for (page = pages; page->next; page = page->next);
652 page->next = ipages;
653 }
654
655 /*
656 * Same with the paragraph list, which will cause the index
657 * to be mentioned in the document outline.
658 */
659 if (!firstpara)
660 firstpara = firstidx;
661 else
662 lastpara->next = firstidx;
663 lastpara = lastidx;
664 }
665
666 /*
667 * Start putting together the overall document structure we're
668 * going to return.
669 */
670 doc = mknew(document);
671 doc->fonts = fontlist;
672 doc->pages = pages;
673 doc->paper_width = conf->paper_width;
674 doc->paper_height = conf->paper_height;
675
676 /*
677 * Collect the section heading paragraphs into a document
678 * outline. This is slightly fiddly because the Title paragraph
679 * isn't required to be at the start, although all the others
680 * must be in order.
681 */
682 {
683 int osize = 20;
684
685 doc->outline_elements = mknewa(outline_element, osize);
686 doc->n_outline_elements = 0;
687
688 /* First find the title. */
689 for (pdata = firstpara; pdata; pdata = pdata->next) {
690 if (pdata->outline_level == 0) {
691 doc->outline_elements[0].level = 0;
692 doc->outline_elements[0].pdata = pdata;
693 doc->n_outline_elements++;
694 break;
695 }
696 }
697
698 /* Then collect the rest. */
699 for (pdata = firstpara; pdata; pdata = pdata->next) {
700 if (pdata->outline_level > 0) {
701 if (doc->n_outline_elements >= osize) {
702 osize += 20;
703 doc->outline_elements =
704 resize(doc->outline_elements, osize);
705 }
706
707 doc->outline_elements[doc->n_outline_elements].level =
708 pdata->outline_level;
709 doc->outline_elements[doc->n_outline_elements].pdata = pdata;
710 doc->n_outline_elements++;
711 }
712 }
713 }
714
715 sfree(conf);
716
717 return doc;
718 }
719
720 static para_data *make_para_data(int ptype, int paux, int indent, int rmargin,
721 word *pkwtext, word *pkwtext2, word *pwords,
722 paper_conf *conf)
723 {
724 para_data *pdata;
725 line_data *ldata;
726 int extra_indent, firstline_indent, aux_indent;
727 word *aux, *aux2;
728
729 pdata = mknew(para_data);
730 pdata->outline_level = -1;
731 pdata->outline_title = NULL;
732 pdata->rect_type = RECT_NONE;
733 pdata->contents_entry = NULL;
734 pdata->justification = JUST;
735
736 /*
737 * Choose fonts for this paragraph.
738 *
739 * FIXME: All of this ought to be completely
740 * user-configurable.
741 */
742 switch (ptype) {
743 case para_Title:
744 pdata->fonts[FONT_NORMAL] = conf->hr;
745 pdata->sizes[FONT_NORMAL] = 24;
746 pdata->fonts[FONT_EMPH] = conf->hi;
747 pdata->sizes[FONT_EMPH] = 24;
748 pdata->fonts[FONT_CODE] = conf->cb;
749 pdata->sizes[FONT_CODE] = 24;
750 pdata->outline_level = 0;
751 break;
752
753 case para_Chapter:
754 case para_Appendix:
755 case para_UnnumberedChapter:
756 pdata->fonts[FONT_NORMAL] = conf->hr;
757 pdata->sizes[FONT_NORMAL] = 20;
758 pdata->fonts[FONT_EMPH] = conf->hi;
759 pdata->sizes[FONT_EMPH] = 20;
760 pdata->fonts[FONT_CODE] = conf->cb;
761 pdata->sizes[FONT_CODE] = 20;
762 pdata->outline_level = 1;
763 break;
764
765 case para_Heading:
766 case para_Subsect:
767 pdata->fonts[FONT_NORMAL] = conf->hr;
768 pdata->fonts[FONT_EMPH] = conf->hi;
769 pdata->fonts[FONT_CODE] = conf->cb;
770 pdata->sizes[FONT_NORMAL] =
771 pdata->sizes[FONT_EMPH] =
772 pdata->sizes[FONT_CODE] =
773 (paux == 0 ? 16 : paux == 1 ? 14 : 13);
774 pdata->outline_level = 2 + paux;
775 break;
776
777 case para_Normal:
778 case para_BiblioCited:
779 case para_Bullet:
780 case para_NumberedList:
781 case para_DescribedThing:
782 case para_Description:
783 case para_Copyright:
784 pdata->fonts[FONT_NORMAL] = conf->tr;
785 pdata->sizes[FONT_NORMAL] = 12;
786 pdata->fonts[FONT_EMPH] = conf->ti;
787 pdata->sizes[FONT_EMPH] = 12;
788 pdata->fonts[FONT_CODE] = conf->cr;
789 pdata->sizes[FONT_CODE] = 12;
790 break;
791 }
792
793 /*
794 * Also select an indentation level depending on the
795 * paragraph type (list paragraphs other than
796 * para_DescribedThing need extra indent).
797 *
798 * (FIXME: Perhaps at some point we might even arrange
799 * for the user to be able to request indented first
800 * lines in paragraphs.)
801 */
802 if (ptype == para_Bullet ||
803 ptype == para_NumberedList ||
804 ptype == para_Description) {
805 extra_indent = firstline_indent = conf->indent_list;
806 } else {
807 extra_indent = firstline_indent = 0;
808 }
809
810 /*
811 * Find the auxiliary text for this paragraph.
812 */
813 aux = aux2 = NULL;
814 aux_indent = 0;
815
816 switch (ptype) {
817 case para_Chapter:
818 case para_Appendix:
819 case para_Heading:
820 case para_Subsect:
821 /*
822 * For some heading styles (FIXME: be able to
823 * configure which), the auxiliary text contains
824 * the chapter number and is arranged to be
825 * right-aligned a few points left of the primary
826 * margin. For other styles, the auxiliary text is
827 * the full chapter _name_ and takes up space
828 * within the (wrapped) chapter title, meaning that
829 * we must move the first line indent over to make
830 * space for it.
831 */
832 if (ptype == para_Heading || ptype == para_Subsect) {
833 int len;
834
835 aux = pkwtext2;
836 len = paper_width_simple(pdata, pkwtext2);
837 aux_indent = -len - conf->sect_num_left_space;
838
839 pdata->outline_title =
840 prepare_outline_title(pkwtext2, L" ", pwords);
841 } else {
842 aux = pkwtext;
843 aux2 = fake_word(L": ");
844 aux_indent = 0;
845
846 firstline_indent += paper_width_simple(pdata, aux);
847 firstline_indent += paper_width_simple(pdata, aux2);
848
849 pdata->outline_title =
850 prepare_outline_title(pkwtext, L": ", pwords);
851 }
852 break;
853
854 case para_Bullet:
855 /*
856 * Auxiliary text consisting of a bullet. (FIXME:
857 * configurable bullet.)
858 */
859 aux = fake_word(L"\x2022");
860 aux_indent = indent + conf->indent_list_bullet;
861 break;
862
863 case para_NumberedList:
864 /*
865 * Auxiliary text consisting of the number followed
866 * by a (FIXME: configurable) full stop.
867 */
868 aux = pkwtext;
869 aux2 = fake_word(L".");
870 aux_indent = indent + conf->indent_list_bullet;
871 break;
872
873 case para_BiblioCited:
874 /*
875 * Auxiliary text consisting of the bibliography
876 * reference text, and a trailing space.
877 */
878 aux = pkwtext;
879 aux2 = fake_word(L" ");
880 aux_indent = indent;
881 firstline_indent += paper_width_simple(pdata, aux);
882 firstline_indent += paper_width_simple(pdata, aux2);
883 break;
884 }
885
886 if (pdata->outline_level >= 0 && !pdata->outline_title) {
887 pdata->outline_title =
888 prepare_outline_title(NULL, NULL, pwords);
889 }
890
891 wrap_paragraph(pdata, pwords, conf->base_width - rmargin,
892 indent + firstline_indent,
893 indent + extra_indent);
894
895 pdata->first->aux_text = aux;
896 pdata->first->aux_text_2 = aux2;
897 pdata->first->aux_left_indent = aux_indent;
898
899 /*
900 * Line breaking penalties.
901 */
902 switch (ptype) {
903 case para_Chapter:
904 case para_Appendix:
905 case para_Heading:
906 case para_Subsect:
907 case para_UnnumberedChapter:
908 /*
909 * Fixed and large penalty for breaking straight
910 * after a heading; corresponding bonus for
911 * breaking straight before.
912 */
913 pdata->first->penalty_before = -500000;
914 pdata->last->penalty_after = 500000;
915 for (ldata = pdata->first; ldata; ldata = ldata->next)
916 ldata->penalty_after = 500000;
917 break;
918
919 case para_DescribedThing:
920 /*
921 * This is treated a bit like a small heading:
922 * there's a penalty for breaking after it (i.e.
923 * between it and its description), and a bonus for
924 * breaking before it (actually _between_ list
925 * items).
926 */
927 pdata->first->penalty_before = -200000;
928 pdata->last->penalty_after = 200000;
929 break;
930
931 default:
932 /*
933 * Most paragraph types: widow/orphan control by
934 * discouraging breaking one line from the end of
935 * any paragraph.
936 */
937 if (pdata->first != pdata->last) {
938 pdata->first->penalty_after = 100000;
939 pdata->last->penalty_before = 100000;
940 }
941 break;
942 }
943
944 standard_line_spacing(pdata, conf);
945
946 /*
947 * Some kinds of section heading require a page break before
948 * them and an underline after.
949 */
950 if (ptype == para_Title ||
951 ptype == para_Chapter ||
952 ptype == para_Appendix ||
953 ptype == para_UnnumberedChapter) {
954 pdata->first->page_break = TRUE;
955 pdata->first->space_before = conf->chapter_top_space;
956 pdata->last->space_after +=
957 (conf->chapter_underline_depth +
958 conf->chapter_underline_thickness);
959 pdata->rect_type = RECT_CHAPTER_UNDERLINE;
960 }
961
962 return pdata;
963 }
964
965 static void standard_line_spacing(para_data *pdata, paper_conf *conf)
966 {
967 line_data *ldata;
968
969 /*
970 * Set the line spacing for each line in this paragraph.
971 */
972 for (ldata = pdata->first; ldata; ldata = ldata->next) {
973 if (ldata == pdata->first)
974 ldata->space_before = conf->base_para_spacing / 2;
975 else
976 ldata->space_before = conf->base_leading / 2;
977 if (ldata == pdata->last)
978 ldata->space_after = conf->base_para_spacing / 2;
979 else
980 ldata->space_after = conf->base_leading / 2;
981 ldata->page_break = FALSE;
982 }
983 }
984
985 static font_encoding *new_font_encoding(font_data *font)
986 {
987 font_encoding *fe;
988 int i;
989
990 fe = mknew(font_encoding);
991 fe->next = NULL;
992
993 if (font->list->tail)
994 font->list->tail->next = fe;
995 else
996 font->list->head = fe;
997 font->list->tail = fe;
998
999 fe->font = font;
1000 fe->free_pos = 0x21;
1001
1002 for (i = 0; i < 256; i++) {
1003 fe->vector[i] = NULL;
1004 fe->indices[i] = -1;
1005 fe->to_unicode[i] = 0xFFFF;
1006 }
1007
1008 return fe;
1009 }
1010
1011 static font_data *make_std_font(font_list *fontlist, char const *name)
1012 {
1013 const int *widths;
1014 int nglyphs;
1015 font_data *f;
1016 font_encoding *fe;
1017 int i;
1018
1019 widths = ps_std_font_widths(name);
1020 if (!widths)
1021 return NULL;
1022
1023 for (nglyphs = 0; ps_std_glyphs[nglyphs] != NULL; nglyphs++);
1024
1025 f = mknew(font_data);
1026
1027 f->list = fontlist;
1028 f->name = name;
1029 f->nglyphs = nglyphs;
1030 f->glyphs = ps_std_glyphs;
1031 f->widths = widths;
1032 f->subfont_map = mknewa(subfont_map_entry, nglyphs);
1033
1034 /*
1035 * Our first subfont will contain all of US-ASCII. This isn't
1036 * really necessary - we could just create custom subfonts
1037 * precisely as the whim of render_string dictated - but
1038 * instinct suggests that it might be nice to have the text in
1039 * the output files look _marginally_ recognisable.
1040 */
1041 fe = new_font_encoding(f);
1042 fe->free_pos = 0xA1; /* only the top half is free */
1043 f->latest_subfont = fe;
1044
1045 for (i = 0; i < (int)lenof(f->bmp); i++)
1046 f->bmp[i] = 0xFFFF;
1047
1048 for (i = 0; i < nglyphs; i++) {
1049 wchar_t ucs;
1050 ucs = ps_glyph_to_unicode(f->glyphs[i]);
1051 assert(ucs != 0xFFFF);
1052 f->bmp[ucs] = i;
1053 if (ucs >= 0x20 && ucs <= 0x7E) {
1054 fe->vector[ucs] = f->glyphs[i];
1055 fe->indices[ucs] = i;
1056 fe->to_unicode[ucs] = ucs;
1057 f->subfont_map[i].subfont = fe;
1058 f->subfont_map[i].position = ucs;
1059 } else {
1060 /*
1061 * This character is not yet assigned to a subfont.
1062 */
1063 f->subfont_map[i].subfont = NULL;
1064 f->subfont_map[i].position = 0;
1065 }
1066 }
1067
1068 return f;
1069 }
1070
1071 static int string_width(font_data *font, wchar_t const *string, int *errs)
1072 {
1073 int width = 0;
1074
1075 if (errs)
1076 *errs = 0;
1077
1078 for (; *string; string++) {
1079 int index;
1080
1081 index = font->bmp[(unsigned short)*string];
1082 if (index == 0xFFFF) {
1083 if (errs)
1084 *errs = 1;
1085 } else {
1086 width += font->widths[index];
1087 }
1088 }
1089
1090 return width;
1091 }
1092
1093 static int paper_width_internal(void *vctx, word *word, int *nspaces);
1094
1095 struct paper_width_ctx {
1096 int minspacewidth;
1097 para_data *pdata;
1098 };
1099
1100 static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) {
1101 int w = 0;
1102 while (text && text != end) {
1103 w += paper_width_internal(vctx, text, nspaces);
1104 text = text->next;
1105 }
1106 return w;
1107 }
1108
1109 static int paper_width_internal(void *vctx, word *word, int *nspaces)
1110 {
1111 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
1112 int style, type, findex, width, errs;
1113 wchar_t *str;
1114
1115 switch (word->type) {
1116 case word_HyperLink:
1117 case word_HyperEnd:
1118 case word_UpperXref:
1119 case word_LowerXref:
1120 case word_PageXref:
1121 case word_XrefEnd:
1122 case word_IndexRef:
1123 return 0;
1124 }
1125
1126 style = towordstyle(word->type);
1127 type = removeattr(word->type);
1128
1129 findex = (style == word_Normal ? FONT_NORMAL :
1130 style == word_Emph ? FONT_EMPH :
1131 FONT_CODE);
1132
1133 if (type == word_Normal) {
1134 str = word->text;
1135 } else if (type == word_WhiteSpace) {
1136 if (findex != FONT_CODE) {
1137 if (nspaces)
1138 (*nspaces)++;
1139 return ctx->minspacewidth;
1140 } else
1141 str = L" ";
1142 } else /* if (type == word_Quote) */ {
1143 if (word->aux == quote_Open)
1144 str = L"\x2018"; /* FIXME: configurability! */
1145 else
1146 str = L"\x2019"; /* FIXME: configurability! */
1147 }
1148
1149 width = string_width(ctx->pdata->fonts[findex], str, &errs);
1150
1151 if (errs && word->alt)
1152 return paper_width_list(vctx, word->alt, NULL, nspaces);
1153 else
1154 return ctx->pdata->sizes[findex] * width;
1155 }
1156
1157 static int paper_width(void *vctx, word *word)
1158 {
1159 return paper_width_internal(vctx, word, NULL);
1160 }
1161
1162 static int paper_width_simple(para_data *pdata, word *text)
1163 {
1164 struct paper_width_ctx ctx;
1165
1166 ctx.pdata = pdata;
1167 ctx.minspacewidth =
1168 (pdata->sizes[FONT_NORMAL] *
1169 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
1170
1171 return paper_width_list(&ctx, text, NULL, NULL);
1172 }
1173
1174 static void wrap_paragraph(para_data *pdata, word *words,
1175 int w, int i1, int i2)
1176 {
1177 wrappedline *wrapping, *p;
1178 int spacewidth;
1179 struct paper_width_ctx ctx;
1180 int line_height;
1181
1182 /*
1183 * We're going to need to store the line height in every line
1184 * structure we generate.
1185 */
1186 {
1187 int i;
1188 line_height = 0;
1189 for (i = 0; i < NFONTS; i++)
1190 if (line_height < pdata->sizes[i])
1191 line_height = pdata->sizes[i];
1192 line_height *= 4096;
1193 }
1194
1195 spacewidth = (pdata->sizes[FONT_NORMAL] *
1196 string_width(pdata->fonts[FONT_NORMAL], L" ", NULL));
1197 if (spacewidth == 0) {
1198 /*
1199 * A font without a space?! Disturbing. I hope this never
1200 * comes up, but I'll make a random guess anyway and set my
1201 * space width to half the point size.
1202 */
1203 spacewidth = pdata->sizes[FONT_NORMAL] * 4096 / 2;
1204 }
1205
1206 /*
1207 * I'm going to set the _minimum_ space width to 3/5 of the
1208 * standard one, and use the standard one as the optimum.
1209 */
1210 ctx.minspacewidth = spacewidth * 3 / 5;
1211 ctx.pdata = pdata;
1212
1213 wrapping = wrap_para(words, w - i1, w - i2, paper_width, &ctx, spacewidth);
1214
1215 /*
1216 * Having done the wrapping, we now concoct a set of line_data
1217 * structures.
1218 */
1219 pdata->first = pdata->last = NULL;
1220
1221 for (p = wrapping; p; p = p->next) {
1222 line_data *ldata;
1223 word *wd;
1224 int len, wid, spaces;
1225
1226 ldata = mknew(line_data);
1227
1228 ldata->pdata = pdata;
1229 ldata->first = p->begin;
1230 ldata->end = p->end;
1231 ldata->line_height = line_height;
1232
1233 ldata->xpos = (p == wrapping ? i1 : i2);
1234
1235 if (pdata->last) {
1236 pdata->last->next = ldata;
1237 ldata->prev = pdata->last;
1238 } else {
1239 pdata->first = ldata;
1240 ldata->prev = NULL;
1241 }
1242 ldata->next = NULL;
1243 pdata->last = ldata;
1244
1245 spaces = 0;
1246 len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces);
1247 wid = (p == wrapping ? w - i1 : w - i2);
1248 wd = ldata->first;
1249
1250 ldata->hshortfall = wid - len;
1251 ldata->nspaces = spaces;
1252 /*
1253 * This tells us how much the space width needs to
1254 * change from _min_spacewidth. But we want to store
1255 * its difference from the _natural_ space width, to
1256 * make the text rendering easier.
1257 */
1258 ldata->hshortfall += ctx.minspacewidth * spaces;
1259 ldata->hshortfall -= spacewidth * spaces;
1260 ldata->real_shortfall = ldata->hshortfall;
1261 /*
1262 * Special case: on the last line of a paragraph, we
1263 * never stretch spaces.
1264 */
1265 if (ldata->hshortfall > 0 && !p->next)
1266 ldata->hshortfall = 0;
1267
1268 ldata->aux_text = NULL;
1269 ldata->aux_text_2 = NULL;
1270 ldata->aux_left_indent = 0;
1271 ldata->penalty_before = ldata->penalty_after = 0;
1272 }
1273
1274 }
1275
1276 static page_data *page_breaks(line_data *first, line_data *last,
1277 int page_height, int ncols, int headspace)
1278 {
1279 line_data *l, *m;
1280 page_data *ph, *pt;
1281 int n, n1, this_height;
1282
1283 /*
1284 * Page breaking is done by a close analogue of the optimal
1285 * paragraph wrapping algorithm used by wrap_para(). We work
1286 * backwards from the end of the document line by line; for
1287 * each line, we contemplate every possible number of lines we
1288 * could put on a page starting with that line, determine a
1289 * cost function for each one, add it to the pre-computed cost
1290 * function for optimally page-breaking everything after that
1291 * page, and pick the best option.
1292 *
1293 * This is made slightly more complex by the fact that we have
1294 * a multi-column index with a heading at the top of the
1295 * _first_ page, meaning that the first _ncols_ pages must have
1296 * a different length. Hence, we must do the wrapping ncols+1
1297 * times over, hypothetically trying to put every subsequence
1298 * on every possible page.
1299 *
1300 * Since my line_data structures are only used for this
1301 * purpose, I might as well just store the algorithm data
1302 * directly in them.
1303 */
1304
1305 for (l = last; l; l = l->prev) {
1306 l->bestcost = mknewa(int, ncols+1);
1307 l->vshortfall = mknewa(int, ncols+1);
1308 l->text = mknewa(int, ncols+1);
1309 l->space = mknewa(int, ncols+1);
1310 l->page_last = mknewa(line_data *, ncols+1);
1311
1312 for (n = 0; n <= ncols; n++) {
1313 int minheight, text = 0, space = 0;
1314 int cost;
1315
1316 n1 = (n < ncols ? n+1 : ncols);
1317 if (n < ncols)
1318 this_height = page_height - headspace;
1319 else
1320 this_height = page_height;
1321
1322 l->bestcost[n] = -1;
1323 for (m = l; m; m = m->next) {
1324 if (m != l && m->page_break)
1325 break; /* we've gone as far as we can */
1326
1327 if (m != l) {
1328 if (m->prev->space_after > 0)
1329 space += m->prev->space_after;
1330 else
1331 text += m->prev->space_after;
1332 }
1333 if (m != l || m->page_break) {
1334 if (m->space_before > 0)
1335 space += m->space_before;
1336 else
1337 text += m->space_before;
1338 }
1339 text += m->line_height;
1340 minheight = text + space;
1341
1342 if (m != l && minheight > this_height)
1343 break;
1344
1345 /*
1346 * If the space after this paragraph is _negative_
1347 * (which means the next line is folded on to this
1348 * one, which happens in the index), we absolutely
1349 * cannot break here.
1350 */
1351 if (m->space_after >= 0) {
1352
1353 /*
1354 * Compute the cost of this arrangement, as the
1355 * square of the amount of wasted space on the
1356 * page. Exception: if this is the last page
1357 * before a mandatory break or the document
1358 * end, we don't penalise a large blank area.
1359 */
1360 if (m != last && m->next && !m->next->page_break)
1361 {
1362 int x = this_height - minheight;
1363 int xf;
1364
1365 xf = x & 0xFF;
1366 x >>= 8;
1367
1368 cost = x*x;
1369 cost += (x * xf) >> 8;
1370 } else
1371 cost = 0;
1372
1373 if (m != last && m->next && !m->next->page_break) {
1374 cost += m->penalty_after;
1375 cost += m->next->penalty_before;
1376 }
1377
1378 if (m != last && m->next && !m->next->page_break)
1379 cost += m->next->bestcost[n1];
1380 if (l->bestcost[n] == -1 || l->bestcost[n] > cost) {
1381 /*
1382 * This is the best option yet for this
1383 * starting point.
1384 */
1385 l->bestcost[n] = cost;
1386 if (m != last && m->next && !m->next->page_break)
1387 l->vshortfall[n] = this_height - minheight;
1388 else
1389 l->vshortfall[n] = 0;
1390 l->text[n] = text;
1391 l->space[n] = space;
1392 l->page_last[n] = m;
1393 }
1394 }
1395
1396 if (m == last)
1397 break;
1398 }
1399 }
1400 }
1401
1402 /*
1403 * Now go through the line list forwards and assemble the
1404 * actual pages.
1405 */
1406 ph = pt = NULL;
1407
1408 l = first;
1409 n = 0;
1410 while (l) {
1411 page_data *page;
1412 int text, space, head;
1413
1414 page = mknew(page_data);
1415 page->next = NULL;
1416 page->prev = pt;
1417 if (pt)
1418 pt->next = page;
1419 else
1420 ph = page;
1421 pt = page;
1422
1423 page->first_line = l;
1424 page->last_line = l->page_last[n];
1425
1426 page->first_text = page->last_text = NULL;
1427 page->first_xref = page->last_xref = NULL;
1428 page->first_rect = page->last_rect = NULL;
1429
1430 /*
1431 * Now assign a y-coordinate to each line on the page.
1432 */
1433 text = space = 0;
1434 head = (n < ncols ? headspace : 0);
1435 for (l = page->first_line; l; l = l->next) {
1436 if (l != page->first_line) {
1437 if (l->prev->space_after > 0)
1438 space += l->prev->space_after;
1439 else
1440 text += l->prev->space_after;
1441 }
1442 if (l != page->first_line || l->page_break) {
1443 if (l->space_before > 0)
1444 space += l->space_before;
1445 else
1446 text += l->space_before;
1447 }
1448 text += l->line_height;
1449
1450 l->page = page;
1451 l->ypos = text + space + head +
1452 space * (float)page->first_line->vshortfall[n] /
1453 page->first_line->space[n];
1454
1455 if (l == page->last_line)
1456 break;
1457 }
1458
1459 l = page->last_line;
1460 if (l == last)
1461 break;
1462 l = l->next;
1463
1464 n = (n < ncols ? n+1 : ncols);
1465 }
1466
1467 return ph;
1468 }
1469
1470 static void add_rect_to_page(page_data *page, int x, int y, int w, int h)
1471 {
1472 rect *r = mknew(rect);
1473
1474 r->next = NULL;
1475 if (page->last_rect)
1476 page->last_rect->next = r;
1477 else
1478 page->first_rect = r;
1479 page->last_rect = r;
1480
1481 r->x = x;
1482 r->y = y;
1483 r->w = w;
1484 r->h = h;
1485 }
1486
1487 static void add_string_to_page(page_data *page, int x, int y,
1488 font_encoding *fe, int size, char *text)
1489 {
1490 text_fragment *frag;
1491
1492 frag = mknew(text_fragment);
1493 frag->next = NULL;
1494
1495 if (page->last_text)
1496 page->last_text->next = frag;
1497 else
1498 page->first_text = frag;
1499 page->last_text = frag;
1500
1501 frag->x = x;
1502 frag->y = y;
1503 frag->fe = fe;
1504 frag->fontsize = size;
1505 frag->text = dupstr(text);
1506 }
1507
1508 /*
1509 * Returns the updated x coordinate.
1510 */
1511 static int render_string(page_data *page, font_data *font, int fontsize,
1512 int x, int y, wchar_t *str)
1513 {
1514 char *text;
1515 int textpos, textwid, glyph;
1516 font_encoding *subfont = NULL, *sf;
1517
1518 text = mknewa(char, 1 + ustrlen(str));
1519 textpos = textwid = 0;
1520
1521 while (*str) {
1522 glyph = font->bmp[*str];
1523
1524 if (glyph == 0xFFFF)
1525 continue; /* nothing more we can do here */
1526
1527 /*
1528 * Find which subfont this character is going in.
1529 */
1530 sf = font->subfont_map[glyph].subfont;
1531
1532 if (!sf) {
1533 int c;
1534
1535 /*
1536 * This character is not yet in a subfont. Assign one.
1537 */
1538 if (font->latest_subfont->free_pos >= 0x100)
1539 font->latest_subfont = new_font_encoding(font);
1540
1541 c = font->latest_subfont->free_pos++;
1542 if (font->latest_subfont->free_pos == 0x7F)
1543 font->latest_subfont->free_pos = 0xA1;
1544
1545 font->subfont_map[glyph].subfont = font->latest_subfont;
1546 font->subfont_map[glyph].position = c;
1547 font->latest_subfont->vector[c] = font->glyphs[glyph];
1548 font->latest_subfont->indices[c] = glyph;
1549 font->latest_subfont->to_unicode[c] = *str;
1550
1551 sf = font->latest_subfont;
1552 }
1553
1554 if (!subfont || sf != subfont) {
1555 if (subfont) {
1556 text[textpos] = '\0';
1557 add_string_to_page(page, x, y, subfont, fontsize, text);
1558 x += textwid;
1559 } else {
1560 assert(textpos == 0);
1561 }
1562 textpos = 0;
1563 subfont = sf;
1564 }
1565
1566 text[textpos++] = font->subfont_map[glyph].position;
1567 textwid += font->widths[glyph] * fontsize;
1568
1569 str++;
1570 }
1571
1572 if (textpos > 0) {
1573 text[textpos] = '\0';
1574 add_string_to_page(page, x, y, subfont, fontsize, text);
1575 x += textwid;
1576 }
1577
1578 return x;
1579 }
1580
1581 /*
1582 * Returns the updated x coordinate.
1583 */
1584 static int render_text(page_data *page, para_data *pdata, line_data *ldata,
1585 int x, int y, word *text, word *text_end, xref **xr,
1586 int shortfall, int nspaces, int *nspace,
1587 keywordlist *keywords, indexdata *idx)
1588 {
1589 while (text && text != text_end) {
1590 int style, type, findex, errs;
1591 wchar_t *str;
1592 xref_dest dest;
1593
1594 switch (text->type) {
1595 /*
1596 * Start a cross-reference.
1597 */
1598 case word_HyperLink:
1599 case word_UpperXref:
1600 case word_LowerXref:
1601 case word_PageXref:
1602
1603 if (text->type == word_HyperLink) {
1604 dest.type = URL;
1605 dest.url = utoa_dup(text->text);
1606 dest.page = NULL;
1607 } else if (text->type == word_PageXref) {
1608 dest.type = PAGE;
1609 dest.url = NULL;
1610 dest.page = (page_data *)text->private_data;
1611 } else {
1612 keyword *kwl = kw_lookup(keywords, text->text);
1613 para_data *pdata;
1614
1615 if (kwl) {
1616 assert(kwl->para->private_data);
1617 pdata = (para_data *) kwl->para->private_data;
1618 dest.type = PAGE;
1619 dest.page = pdata->first->page;
1620 dest.url = NULL;
1621 } else {
1622 /*
1623 * Shouldn't happen, but *shrug*
1624 */
1625 dest.type = NONE;
1626 dest.page = NULL;
1627 dest.url = NULL;
1628 }
1629 }
1630 if (dest.type != NONE) {
1631 *xr = mknew(xref);
1632 (*xr)->dest = dest; /* structure copy */
1633 if (page->last_xref)
1634 page->last_xref->next = *xr;
1635 else
1636 page->first_xref = *xr;
1637 page->last_xref = *xr;
1638 (*xr)->next = NULL;
1639
1640 /*
1641 * FIXME: Ideally we should have, and use, some
1642 * vertical font metric information here so that
1643 * our cross-ref rectangle can take account of
1644 * descenders and the font's cap height. This will
1645 * do for the moment, but it isn't ideal.
1646 */
1647 (*xr)->lx = (*xr)->rx = x;
1648 (*xr)->by = y;
1649 (*xr)->ty = y + ldata->line_height;
1650 }
1651 goto nextword;
1652
1653 /*
1654 * Finish extending a cross-reference box.
1655 */
1656 case word_HyperEnd:
1657 case word_XrefEnd:
1658 *xr = NULL;
1659 goto nextword;
1660
1661 /*
1662 * Add the current page number to the list of pages
1663 * referenced by an index entry.
1664 */
1665 case word_IndexRef:
1666 {
1667 indextag *tag;
1668 int i;
1669
1670 tag = index_findtag(idx, text->text);
1671 if (!tag)
1672 goto nextword;
1673
1674 for (i = 0; i < tag->nrefs; i++) {
1675 indexentry *entry = tag->refs[i];
1676 paper_idx *pi = (paper_idx *)entry->backend_data;
1677
1678 /*
1679 * If the same index term is indexed twice
1680 * within the same section, we only want to
1681 * mention it once in the index.
1682 */
1683 if (pi->lastpage != page) {
1684 word **wp;
1685
1686 if (pi->lastword) {
1687 pi->lastword = pi->lastword->next =
1688 fake_word(L",");
1689 pi->lastword = pi->lastword->next =
1690 fake_space_word();
1691 wp = &pi->lastword->next;
1692 } else
1693 wp = &pi->words;
1694
1695 pi->lastword = *wp =
1696 fake_page_ref(page);
1697 pi->lastword = pi->lastword->next =
1698 fake_word(page->number);
1699 pi->lastword = pi->lastword->next =
1700 fake_end_ref();
1701 }
1702
1703 pi->lastpage = page;
1704 }
1705 }
1706 goto nextword;
1707 }
1708
1709 style = towordstyle(text->type);
1710 type = removeattr(text->type);
1711
1712 findex = (style == word_Normal ? FONT_NORMAL :
1713 style == word_Emph ? FONT_EMPH :
1714 FONT_CODE);
1715
1716 if (type == word_Normal) {
1717 str = text->text;
1718 } else if (type == word_WhiteSpace) {
1719 x += pdata->sizes[findex] *
1720 string_width(pdata->fonts[findex], L" ", NULL);
1721 if (nspaces && findex != FONT_CODE) {
1722 x += (*nspace+1) * shortfall / nspaces;
1723 x -= *nspace * shortfall / nspaces;
1724 (*nspace)++;
1725 }
1726 goto nextword;
1727 } else /* if (type == word_Quote) */ {
1728 if (text->aux == quote_Open)
1729 str = L"\x2018"; /* FIXME: configurability! */
1730 else
1731 str = L"\x2019"; /* FIXME: configurability! */
1732 }
1733
1734 (void) string_width(pdata->fonts[findex], str, &errs);
1735
1736 if (errs && text->alt)
1737 x = render_text(page, pdata, ldata, x, y, text->alt, NULL,
1738 xr, shortfall, nspaces, nspace, keywords, idx);
1739 else
1740 x = render_string(page, pdata->fonts[findex],
1741 pdata->sizes[findex], x, y, str);
1742
1743 if (*xr)
1744 (*xr)->rx = x;
1745
1746 nextword:
1747 text = text->next;
1748 }
1749
1750 return x;
1751 }
1752
1753 /*
1754 * Returns the last x position used on the line.
1755 */
1756 static int render_line(line_data *ldata, int left_x, int top_y,
1757 xref_dest *dest, keywordlist *keywords, indexdata *idx)
1758 {
1759 int nspace;
1760 xref *xr;
1761 int ret = 0;
1762
1763 if (ldata->aux_text) {
1764 int x;
1765 xr = NULL;
1766 nspace = 0;
1767 x = render_text(ldata->page, ldata->pdata, ldata,
1768 left_x + ldata->aux_left_indent,
1769 top_y - ldata->ypos,
1770 ldata->aux_text, NULL, &xr, 0, 0, &nspace,
1771 keywords, idx);
1772 if (ldata->aux_text_2)
1773 render_text(ldata->page, ldata->pdata, ldata,
1774 x, top_y - ldata->ypos,
1775 ldata->aux_text_2, NULL, &xr, 0, 0, &nspace,
1776 keywords, idx);
1777 }
1778 nspace = 0;
1779
1780 if (ldata->first) {
1781 /*
1782 * There might be a cross-reference carried over from a
1783 * previous line.
1784 */
1785 if (dest->type != NONE) {
1786 xr = mknew(xref);
1787 xr->next = NULL;
1788 xr->dest = *dest; /* structure copy */
1789 if (ldata->page->last_xref)
1790 ldata->page->last_xref->next = xr;
1791 else
1792 ldata->page->first_xref = xr;
1793 ldata->page->last_xref = xr;
1794 xr->lx = xr->rx = left_x + ldata->xpos;
1795 xr->by = top_y - ldata->ypos;
1796 xr->ty = top_y - ldata->ypos + ldata->line_height;
1797 } else
1798 xr = NULL;
1799
1800 {
1801 int extra_indent, shortfall, spaces;
1802 int just = ldata->pdata->justification;
1803
1804 /*
1805 * All forms of justification become JUST when we have
1806 * to squeeze the paragraph.
1807 */
1808 if (ldata->hshortfall < 0)
1809 just = JUST;
1810
1811 switch (just) {
1812 case JUST:
1813 shortfall = ldata->hshortfall;
1814 spaces = ldata->nspaces;
1815 extra_indent = 0;
1816 break;
1817 case LEFT:
1818 shortfall = spaces = extra_indent = 0;
1819 break;
1820 case RIGHT:
1821 shortfall = spaces = 0;
1822 extra_indent = ldata->real_shortfall;
1823 break;
1824 }
1825
1826 ret = render_text(ldata->page, ldata->pdata, ldata,
1827 left_x + ldata->xpos + extra_indent,
1828 top_y - ldata->ypos, ldata->first, ldata->end,
1829 &xr, shortfall, spaces, &nspace,
1830 keywords, idx);
1831 }
1832
1833 if (xr) {
1834 /*
1835 * There's a cross-reference continued on to the next line.
1836 */
1837 *dest = xr->dest;
1838 } else
1839 dest->type = NONE;
1840 }
1841
1842 return ret;
1843 }
1844
1845 static void render_para(para_data *pdata, paper_conf *conf,
1846 keywordlist *keywords, indexdata *idx,
1847 paragraph *index_placeholder, page_data *index_page)
1848 {
1849 int last_x;
1850 xref *cxref;
1851 page_data *cxref_page;
1852 xref_dest dest;
1853 para_data *target;
1854 line_data *ldata;
1855
1856 dest.type = NONE;
1857 cxref = NULL;
1858 cxref_page = NULL;
1859
1860 for (ldata = pdata->first; ldata; ldata = ldata->next) {
1861 /*
1862 * If this is a contents entry, we expect to have a single
1863 * enormous cross-reference rectangle covering the whole
1864 * thing. (Unless, of course, it spans multiple pages.)
1865 */
1866 if (pdata->contents_entry && ldata->page != cxref_page) {
1867 cxref_page = ldata->page;
1868 cxref = mknew(xref);
1869 cxref->next = NULL;
1870 cxref->dest.type = PAGE;
1871 if (pdata->contents_entry == index_placeholder) {
1872 cxref->dest.page = index_page;
1873 } else {
1874 assert(pdata->contents_entry->private_data);
1875 target = (para_data *)pdata->contents_entry->private_data;
1876 cxref->dest.page = target->first->page;
1877 }
1878 cxref->dest.url = NULL;
1879 if (ldata->page->last_xref)
1880 ldata->page->last_xref->next = cxref;
1881 else
1882 ldata->page->first_xref = cxref;
1883 ldata->page->last_xref = cxref;
1884 cxref->lx = conf->left_margin;
1885 cxref->rx = conf->paper_width - conf->right_margin;
1886 cxref->ty = conf->paper_height - conf->top_margin
1887 - ldata->ypos + ldata->line_height;
1888 }
1889 if (pdata->contents_entry) {
1890 assert(cxref != NULL);
1891 cxref->by = conf->paper_height - conf->top_margin
1892 - ldata->ypos;
1893 }
1894
1895 last_x = render_line(ldata, conf->left_margin,
1896 conf->paper_height - conf->top_margin,
1897 &dest, keywords, idx);
1898 if (ldata == pdata->last)
1899 break;
1900 }
1901
1902 /*
1903 * If this is a contents entry, add leaders and a page
1904 * number.
1905 */
1906 if (pdata->contents_entry) {
1907 word *w;
1908 wchar_t *num;
1909 int wid;
1910 int x;
1911
1912 if (pdata->contents_entry == index_placeholder) {
1913 num = index_page->number;
1914 } else {
1915 assert(pdata->contents_entry->private_data);
1916 target = (para_data *)pdata->contents_entry->private_data;
1917 num = target->first->page->number;
1918 }
1919
1920 w = fake_word(num);
1921 wid = paper_width_simple(pdata, w);
1922 sfree(w);
1923
1924 render_string(pdata->last->page,
1925 pdata->fonts[FONT_NORMAL],
1926 pdata->sizes[FONT_NORMAL],
1927 conf->paper_width - conf->right_margin - wid,
1928 (conf->paper_height - conf->top_margin -
1929 pdata->last->ypos), num);
1930
1931 for (x = 0; x < conf->base_width; x += conf->leader_separation)
1932 if (x - conf->leader_separation > last_x - conf->left_margin &&
1933 x + conf->leader_separation < conf->base_width - wid)
1934 render_string(pdata->last->page,
1935 pdata->fonts[FONT_NORMAL],
1936 pdata->sizes[FONT_NORMAL],
1937 conf->left_margin + x,
1938 (conf->paper_height - conf->top_margin -
1939 pdata->last->ypos), L".");
1940 }
1941
1942 /*
1943 * Render any rectangle (chapter title underline or rule)
1944 * that goes with this paragraph.
1945 */
1946 switch (pdata->rect_type) {
1947 case RECT_CHAPTER_UNDERLINE:
1948 add_rect_to_page(pdata->last->page,
1949 conf->left_margin,
1950 (conf->paper_height - conf->top_margin -
1951 pdata->last->ypos -
1952 conf->chapter_underline_depth),
1953 conf->base_width,
1954 conf->chapter_underline_thickness);
1955 break;
1956 case RECT_RULE:
1957 add_rect_to_page(pdata->first->page,
1958 conf->left_margin + pdata->first->xpos,
1959 (conf->paper_height - conf->top_margin -
1960 pdata->last->ypos -
1961 pdata->last->line_height),
1962 conf->base_width - pdata->first->xpos,
1963 pdata->last->line_height);
1964 break;
1965 default: /* placate gcc */
1966 break;
1967 }
1968 }
1969
1970 static para_data *code_paragraph(int indent, word *words, paper_conf *conf)
1971 {
1972 para_data *pdata = mknew(para_data);
1973
1974 /*
1975 * For code paragraphs, I'm going to hack grievously and
1976 * pretend the three normal fonts are the three code paragraph
1977 * fonts.
1978 */
1979 pdata->fonts[FONT_NORMAL] = conf->cb;
1980 pdata->fonts[FONT_EMPH] = conf->co;
1981 pdata->fonts[FONT_CODE] = conf->cr;
1982 pdata->sizes[FONT_NORMAL] =
1983 pdata->sizes[FONT_EMPH] =
1984 pdata->sizes[FONT_CODE] = 12;
1985
1986 pdata->first = pdata->last = NULL;
1987 pdata->outline_level = -1;
1988 pdata->rect_type = RECT_NONE;
1989 pdata->contents_entry = NULL;
1990 pdata->justification = LEFT;
1991
1992 for (; words; words = words->next) {
1993 wchar_t *t, *e, *start;
1994 word *lhead = NULL, *ltail = NULL, *w;
1995 line_data *ldata;
1996 int prev = -1, curr;
1997
1998 t = words->text;
1999 if (words->next && words->next->type == word_Emph) {
2000 e = words->next->text;
2001 words = words->next;
2002 } else
2003 e = NULL;
2004
2005 start = t;
2006
2007 while (*start) {
2008 while (*t) {
2009 if (!e || !*e)
2010 curr = 0;
2011 else if (*e == L'i')
2012 curr = 1;
2013 else if (*e == L'b')
2014 curr = 2;
2015 else
2016 curr = 0;
2017
2018 if (prev < 0)
2019 prev = curr;
2020
2021 if (curr != prev)
2022 break;
2023
2024 t++;
2025 if (e && *e)
2026 e++;
2027 }
2028
2029 /*
2030 * We've isolated a maximal subsequence of the line
2031 * which has the same emphasis. Form it into a word
2032 * structure.
2033 */
2034 w = mknew(word);
2035 w->next = NULL;
2036 w->alt = NULL;
2037 w->type = (prev == 0 ? word_WeakCode :
2038 prev == 1 ? word_Emph : word_Normal);
2039 w->text = mknewa(wchar_t, t-start+1);
2040 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
2041 w->text[t-start] = '\0';
2042 w->breaks = FALSE;
2043
2044 if (ltail)
2045 ltail->next = w;
2046 else
2047 lhead = w;
2048 ltail = w;
2049
2050 start = t;
2051 prev = -1;
2052 }
2053
2054 ldata = mknew(line_data);
2055
2056 ldata->pdata = pdata;
2057 ldata->first = lhead;
2058 ldata->end = NULL;
2059 ldata->line_height = conf->base_font_size * 4096;
2060
2061 ldata->xpos = indent;
2062
2063 if (pdata->last) {
2064 pdata->last->next = ldata;
2065 ldata->prev = pdata->last;
2066 } else {
2067 pdata->first = ldata;
2068 ldata->prev = NULL;
2069 }
2070 ldata->next = NULL;
2071 pdata->last = ldata;
2072
2073 ldata->hshortfall = 0;
2074 ldata->nspaces = 0;
2075 ldata->aux_text = NULL;
2076 ldata->aux_text_2 = NULL;
2077 ldata->aux_left_indent = 0;
2078 /* General opprobrium for breaking in a code paragraph. */
2079 ldata->penalty_before = ldata->penalty_after = 50000;
2080 }
2081
2082 standard_line_spacing(pdata, conf);
2083
2084 return pdata;
2085 }
2086
2087 static para_data *rule_paragraph(int indent, paper_conf *conf)
2088 {
2089 para_data *pdata = mknew(para_data);
2090 line_data *ldata;
2091
2092 ldata = mknew(line_data);
2093
2094 ldata->pdata = pdata;
2095 ldata->first = NULL;
2096 ldata->end = NULL;
2097 ldata->line_height = conf->rule_thickness;
2098
2099 ldata->xpos = indent;
2100
2101 ldata->prev = NULL;
2102 ldata->next = NULL;
2103
2104 ldata->hshortfall = 0;
2105 ldata->nspaces = 0;
2106 ldata->aux_text = NULL;
2107 ldata->aux_text_2 = NULL;
2108 ldata->aux_left_indent = 0;
2109
2110 /*
2111 * Better to break after a rule than before it
2112 */
2113 ldata->penalty_after += 100000;
2114 ldata->penalty_before += -100000;
2115
2116 pdata->first = pdata->last = ldata;
2117 pdata->outline_level = -1;
2118 pdata->rect_type = RECT_RULE;
2119 pdata->contents_entry = NULL;
2120 pdata->justification = LEFT;
2121
2122 standard_line_spacing(pdata, conf);
2123
2124 return pdata;
2125 }
2126
2127 /*
2128 * Plain-text-like formatting for outline titles.
2129 */
2130 static void paper_rdaddw(rdstring *rs, word *text) {
2131 for (; text; text = text->next) switch (text->type) {
2132 case word_HyperLink:
2133 case word_HyperEnd:
2134 case word_UpperXref:
2135 case word_LowerXref:
2136 case word_XrefEnd:
2137 case word_IndexRef:
2138 break;
2139
2140 case word_Normal:
2141 case word_Emph:
2142 case word_Code:
2143 case word_WeakCode:
2144 case word_WhiteSpace:
2145 case word_EmphSpace:
2146 case word_CodeSpace:
2147 case word_WkCodeSpace:
2148 case word_Quote:
2149 case word_EmphQuote:
2150 case word_CodeQuote:
2151 case word_WkCodeQuote:
2152 assert(text->type != word_CodeQuote &&
2153 text->type != word_WkCodeQuote);
2154 if (towordstyle(text->type) == word_Emph &&
2155 (attraux(text->aux) == attr_First ||
2156 attraux(text->aux) == attr_Only))
2157 rdadd(rs, L'_'); /* FIXME: configurability */
2158 else if (towordstyle(text->type) == word_Code &&
2159 (attraux(text->aux) == attr_First ||
2160 attraux(text->aux) == attr_Only))
2161 rdadd(rs, L'\''); /* FIXME: configurability */
2162 if (removeattr(text->type) == word_Normal) {
2163 rdadds(rs, text->text);
2164 } else if (removeattr(text->type) == word_WhiteSpace) {
2165 rdadd(rs, L' ');
2166 } else if (removeattr(text->type) == word_Quote) {
2167 rdadd(rs, L'\''); /* fixme: configurability */
2168 }
2169 if (towordstyle(text->type) == word_Emph &&
2170 (attraux(text->aux) == attr_Last ||
2171 attraux(text->aux) == attr_Only))
2172 rdadd(rs, L'_'); /* FIXME: configurability */
2173 else if (towordstyle(text->type) == word_Code &&
2174 (attraux(text->aux) == attr_Last ||
2175 attraux(text->aux) == attr_Only))
2176 rdadd(rs, L'\''); /* FIXME: configurability */
2177 break;
2178 }
2179 }
2180
2181 static wchar_t *prepare_outline_title(word *first, wchar_t *separator,
2182 word *second)
2183 {
2184 rdstring rs = {0, 0, NULL};
2185
2186 if (first)
2187 paper_rdaddw(&rs, first);
2188 if (separator)
2189 rdadds(&rs, separator);
2190 if (second)
2191 paper_rdaddw(&rs, second);
2192
2193 return rs.text;
2194 }
2195
2196 static word *fake_word(wchar_t *text)
2197 {
2198 word *ret = mknew(word);
2199 ret->next = NULL;
2200 ret->alt = NULL;
2201 ret->type = word_Normal;
2202 ret->text = ustrdup(text);
2203 ret->breaks = FALSE;
2204 ret->aux = 0;
2205 return ret;
2206 }
2207
2208 static word *fake_space_word(void)
2209 {
2210 word *ret = mknew(word);
2211 ret->next = NULL;
2212 ret->alt = NULL;
2213 ret->type = word_WhiteSpace;
2214 ret->text = NULL;
2215 ret->breaks = TRUE;
2216 ret->aux = 0;
2217 return ret;
2218 }
2219
2220 static word *fake_page_ref(page_data *page)
2221 {
2222 word *ret = mknew(word);
2223 ret->next = NULL;
2224 ret->alt = NULL;
2225 ret->type = word_PageXref;
2226 ret->text = NULL;
2227 ret->breaks = FALSE;
2228 ret->aux = 0;
2229 ret->private_data = page;
2230 return ret;
2231 }
2232
2233 static word *fake_end_ref(void)
2234 {
2235 word *ret = mknew(word);
2236 ret->next = NULL;
2237 ret->alt = NULL;
2238 ret->type = word_XrefEnd;
2239 ret->text = NULL;
2240 ret->breaks = FALSE;
2241 ret->aux = 0;
2242 return ret;
2243 }
2244
2245 static word *prepare_contents_title(word *first, wchar_t *separator,
2246 word *second)
2247 {
2248 word *ret;
2249 word **wptr, *w;
2250
2251 wptr = &ret;
2252
2253 if (first) {
2254 w = dup_word_list(first);
2255 *wptr = w;
2256 while (w->next)
2257 w = w->next;
2258 wptr = &w->next;
2259 }
2260
2261 if (separator) {
2262 w = fake_word(separator);
2263 *wptr = w;
2264 wptr = &w->next;
2265 }
2266
2267 if (second) {
2268 *wptr = dup_word_list(second);
2269 }
2270
2271 return ret;
2272 }
2273
2274 static void fold_into_page(page_data *dest, page_data *src, int right_shift)
2275 {
2276 line_data *ldata;
2277
2278 if (!src->first_line)
2279 return;
2280
2281 if (dest->last_line) {
2282 dest->last_line->next = src->first_line;
2283 src->first_line->prev = dest->last_line;
2284 }
2285 dest->last_line = src->last_line;
2286
2287 for (ldata = src->first_line; ldata; ldata = ldata->next) {
2288 ldata->page = dest;
2289 ldata->xpos += right_shift;
2290
2291 if (ldata == src->last_line)
2292 break;
2293 }
2294 }