X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/4334192268c4b1c0c27a91d043792a21bd8d1292..3e030063e831da0af7bdb8d12f2f25a5224ba0be:/bk_paper.c diff --git a/bk_paper.c b/bk_paper.c index 0bf3ce1..a9f6f24 100644 --- a/bk_paper.c +++ b/bk_paper.c @@ -10,38 +10,71 @@ */ /* - * To be done: - * - * - Text wrapping is suspicious in both PS and PDF: the space - * adjust seems to be _approximately_ working, but not exactly. - * I bet some rounding error compensation is required. - * - * - set up contents section now we know what sections begin on - * which pages - * - * - do cross-reference rectangles - * - * - do PDF outline - * - * - all the missing features in text rendering (code paragraphs, - * list bullets, indentation, section heading styles) - * - * - index - * - * That should bring us to the same level of functionality that - * original-Halibut had, and the same in PDF plus the obvious - * interactive navigation features. After that, in future work: + * TODO in future work: * * - linearised PDF, perhaps? * + * - we should use PDFDocEncoding or Unicode for outline strings, + * now that I actually know how to do them. Probably easiest if + * I do this _after_ bringing in libcharset, since I can simply + * supply PDFDocEncoding in there. + * * - I'm uncertain of whether I need to include a ToUnicode CMap * in each of my font definitions in PDF. Currently things (by * which I mean cut and paste out of acroread) seem to be * working fairly happily without it, but I don't know. * + * - rather than the ugly aux_text mechanism for rendering chapter + * titles, we could actually build the correct word list and + * wrap it as a whole. + * + * - get vertical font metrics and use them to position the PDF + * xref boxes more pleasantly + * * - configurability + * * all the measurements in `conf' should be configurable + * + notably paper size/shape + * * page header and footer should be configurable; we should + * be able to shift the page number elsewhere, and add other + * things such as the current chapter/section title and fixed + * text + * * remove the fixed mapping from heading levels to heading + * styles; offer a menu of styles from which the user can + * choose at every heading level + * * first-line indent in paragraphs + * * fixed text: `Contents', `Index', bullet, quotes, the + * colon-space and full stop in chapter title constructions + * * configurable location of contents? + * * certainly configurably _remove_ the contents, and possibly + * also the index + * * double-sided document switch? + * + means you have two header/footer formats which + * alternate + * + and means that mandatory page breaks before chapter + * titles should include a blank page if necessary to + * start the next section to a right-hand page * * - title pages + * + * - ability to import other Type 1 fonts + * * we need to parse the font to extract its metrics + * * then we pass the font bodily to both PS and PDF so it can + * be included in the output file + * + * - character substitution for better typography? + * * fi, fl, ffi, ffl ligatures + * * use real ellipsis rather than ... + * * a hyphen in a word by itself might prefer to be an en-dash + * * (Americans might even want a convenient way to use an + * em-dash) + * * DON'T DO ANY OF THE ABOVE WITHIN \c OR \cw! + * * substituting `minus' for `hyphen' in the standard encoding + * is probably preferable in Courier, though certainly not in + * the main text font + * * if I do do this lot, I'm rather inclined to at least try + * to think up a configurable way to do it so that Americans + * can do em-dash tricks without my intervention and other + * people can do other odd things too. */ #include @@ -50,59 +83,280 @@ #include "halibut.h" #include "paper.h" +typedef struct paper_conf_Tag paper_conf; +typedef struct paper_idx_Tag paper_idx; + +struct paper_conf_Tag { + int paper_width; + int paper_height; + int left_margin; + int top_margin; + int right_margin; + int bottom_margin; + int indent_list_bullet; + int indent_list; + int indent_quote; + int base_leading; + int base_para_spacing; + int chapter_top_space; + int sect_num_left_space; + int chapter_underline_depth; + int chapter_underline_thickness; + int rule_thickness; + int base_font_size; + int contents_indent_step; + int contents_margin; + int leader_separation; + int index_gutter; + int index_cols; + int index_minsep; + int pagenum_fontsize; + int footer_distance; + /* These are derived from the above */ + int base_width; + int page_height; + int index_colwidth; + /* Fonts used in the configuration */ + font_data *tr, *ti, *hr, *hi, *cr, *co, *cb; +}; + +struct paper_idx_Tag { + /* + * Word list giving the page numbers on which this index entry + * appears. Also the last word in the list, for ease of + * construction. + */ + word *words; + word *lastword; + /* + * The last page added to the list (so we can ensure we don't + * add one twice). + */ + page_data *lastpage; +}; + +enum { + word_PageXref = word_NotWordType + 1 +}; + static font_data *make_std_font(font_list *fontlist, char const *name); static void wrap_paragraph(para_data *pdata, word *words, int w, int i1, int i2); static page_data *page_breaks(line_data *first, line_data *last, - int page_height); -static void render_line(line_data *ldata, int left_x, int top_y); + int page_height, int ncols, int headspace); +static int render_string(page_data *page, font_data *font, int fontsize, + int x, int y, wchar_t *str); +static int render_line(line_data *ldata, int left_x, int top_y, + xref_dest *dest, keywordlist *keywords, indexdata *idx); +static void render_para(para_data *pdata, paper_conf *conf, + keywordlist *keywords, indexdata *idx, + paragraph *index_placeholder, page_data *index_page); +static int string_width(font_data *font, wchar_t const *string, int *errs); +static int paper_width_simple(para_data *pdata, word *text); +static para_data *code_paragraph(int indent, word *words, paper_conf *conf); +static para_data *rule_paragraph(int indent, paper_conf *conf); +static void add_rect_to_page(page_data *page, int x, int y, int w, int h); +static para_data *make_para_data(int ptype, int paux, int indent, int rmargin, + word *pkwtext, word *pkwtext2, word *pwords, + paper_conf *conf); +static void standard_line_spacing(para_data *pdata, paper_conf *conf); +static wchar_t *prepare_outline_title(word *first, wchar_t *separator, + word *second); +static word *fake_word(wchar_t *text); +static word *fake_space_word(void); +static word *fake_page_ref(page_data *page); +static word *fake_end_ref(void); +static word *prepare_contents_title(word *first, wchar_t *separator, + word *second); +static void fold_into_page(page_data *dest, page_data *src, int right_shift); void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, indexdata *idx) { paragraph *p; document *doc; - int indent, extra_indent, firstline_indent; - para_data *pdata; - line_data *ldata, *firstline, *lastline; - font_data *tr, *ti, *cr; + int indent, used_contents; + para_data *pdata, *firstpara = NULL, *lastpara = NULL; + para_data *firstcont, *lastcont; + line_data *firstline, *lastline, *firstcontline, *lastcontline; page_data *pages; font_list *fontlist; + paper_conf *conf; + int has_index; + int pagenum; + paragraph index_placeholder_para; + page_data *first_index_page; /* * FIXME: All these things ought to become configurable. */ - int paper_width = 595 * 4096; - int paper_height = 841 * 4096; - int left_margin = 72 * 4096; - int top_margin = 72 * 4096; - int right_margin = 72 * 4096; - int bottom_margin = 108 * 4096; - int indent_list_bullet = 6 * 4096; - int indent_list = 24 * 4096; - int indent_quote = 18 * 4096; - int base_leading = 4096; - int base_para_spacing = 10 * 4096; - - int base_width = paper_width - left_margin - right_margin; - int page_height = paper_height - top_margin - bottom_margin; - - IGNORE(keywords); /* FIXME */ - IGNORE(idx); /* FIXME */ - IGNORE(indent_list_bullet); /* FIXME */ + conf = mknew(paper_conf); + conf->paper_width = 595 * 4096; + conf->paper_height = 841 * 4096; + conf->left_margin = 72 * 4096; + conf->top_margin = 72 * 4096; + conf->right_margin = 72 * 4096; + conf->bottom_margin = 108 * 4096; + conf->indent_list_bullet = 6 * 4096; + conf->indent_list = 24 * 4096; + conf->indent_quote = 18 * 4096; + conf->base_leading = 4096; + conf->base_para_spacing = 10 * 4096; + conf->chapter_top_space = 72 * 4096; + conf->sect_num_left_space = 12 * 4096; + conf->chapter_underline_depth = 14 * 4096; + conf->chapter_underline_thickness = 3 * 4096; + conf->rule_thickness = 1 * 4096; + conf->base_font_size = 12; + conf->contents_indent_step = 24 * 4096; + conf->contents_margin = 84 * 4096; + conf->leader_separation = 12 * 4096; + conf->index_gutter = 36 * 4096; + conf->index_cols = 2; + conf->index_minsep = 18 * 4096; + conf->pagenum_fontsize = 12; + conf->footer_distance = 32 * 4096; + + conf->base_width = + conf->paper_width - conf->left_margin - conf->right_margin; + conf->page_height = + conf->paper_height - conf->top_margin - conf->bottom_margin; + conf->index_colwidth = + (conf->base_width - (conf->index_cols-1) * conf->index_gutter) + / conf->index_cols; /* * First, set up some font structures. */ fontlist = mknew(font_list); fontlist->head = fontlist->tail = NULL; - tr = make_std_font(fontlist, "Times-Roman"); - ti = make_std_font(fontlist, "Times-Italic"); - cr = make_std_font(fontlist, "Courier"); + conf->tr = make_std_font(fontlist, "Times-Roman"); + conf->ti = make_std_font(fontlist, "Times-Italic"); + conf->hr = make_std_font(fontlist, "Helvetica-Bold"); + conf->hi = make_std_font(fontlist, "Helvetica-BoldOblique"); + conf->cr = make_std_font(fontlist, "Courier"); + conf->co = make_std_font(fontlist, "Courier-Oblique"); + conf->cb = make_std_font(fontlist, "Courier-Bold"); + + /* + * Set up a data structure to collect page numbers for each + * index entry. + */ + { + int i; + indexentry *entry; + + has_index = FALSE; + + for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { + paper_idx *pi = mknew(paper_idx); + + has_index = TRUE; + + pi->words = pi->lastword = NULL; + pi->lastpage = NULL; + + entry->backend_data = pi; + } + } + + /* + * Format the contents entry for each heading. + */ + { + word *contents_title; + contents_title = fake_word(L"Contents"); + + firstcont = make_para_data(para_UnnumberedChapter, 0, 0, 0, + NULL, NULL, contents_title, conf); + lastcont = firstcont; + lastcont->next = NULL; + firstcontline = firstcont->first; + lastcontline = lastcont->last; + for (p = sourceform; p; p = p->next) { + word *words; + int indent; + + switch (p->type) { + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + case para_Heading: + case para_Subsect: + switch (p->type) { + case para_Chapter: + case para_Appendix: + words = prepare_contents_title(p->kwtext, L": ", p->words); + indent = 0; + break; + case para_UnnumberedChapter: + words = prepare_contents_title(NULL, NULL, p->words); + indent = 0; + break; + case para_Heading: + case para_Subsect: + words = prepare_contents_title(p->kwtext2, L" ", p->words); + indent = (p->aux + 1) * conf->contents_indent_step; + break; + } + pdata = make_para_data(para_Normal, p->aux, indent, + conf->contents_margin, + NULL, NULL, words, conf); + pdata->next = NULL; + pdata->contents_entry = p; + lastcont->next = pdata; + lastcont = pdata; + + /* + * Link all contents line structures together into + * a big list. + */ + if (pdata->first) { + if (lastcontline) { + lastcontline->next = pdata->first; + pdata->first->prev = lastcontline; + } else { + firstcontline = pdata->first; + pdata->first->prev = NULL; + } + lastcontline = pdata->last; + lastcontline->next = NULL; + } + + break; + } + } + + /* + * And one extra one, for the index. + */ + if (has_index) { + pdata = make_para_data(para_Normal, 0, 0, + conf->contents_margin, + NULL, NULL, fake_word(L"Index"), conf); + pdata->next = NULL; + pdata->contents_entry = &index_placeholder_para; + lastcont->next = pdata; + lastcont = pdata; + + if (pdata->first) { + if (lastcontline) { + lastcontline->next = pdata->first; + pdata->first->prev = lastcontline; + } else { + firstcontline = pdata->first; + pdata->first->prev = NULL; + } + lastcontline = pdata->last; + lastcontline->next = NULL; + } + } + } /* - * Go through and break up each paragraph into lines. + * Do the main paragraph formatting. */ indent = 0; + used_contents = FALSE; firstline = lastline = NULL; for (p = sourceform; p; p = p->next) { p->private_data = NULL; @@ -115,7 +369,6 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, */ case para_IM: case para_BR: - case para_Rule: case para_Biblio: case para_NotParaType: case para_Config: @@ -129,20 +382,33 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, * rest of the paragraphs, so we need to pay attention. */ case para_LcontPush: - indent += indent_list; break; + indent += conf->indent_list; break; case para_LcontPop: - indent -= indent_list; assert(indent >= 0); break; + indent -= conf->indent_list; assert(indent >= 0); break; case para_QuotePush: - indent += indent_quote; break; + indent += conf->indent_quote; break; case para_QuotePop: - indent -= indent_quote; assert(indent >= 0); break; + indent -= conf->indent_quote; assert(indent >= 0); break; /* * This paragraph type is special. Process it * specially. */ case para_Code: - /* FIXME */ + pdata = code_paragraph(indent, p->words, conf); + p->private_data = pdata; + if (pdata->first != pdata->last) { + pdata->first->penalty_after += 100000; + pdata->last->penalty_before += 100000; + } + break; + + /* + * This paragraph is also special. + */ + case para_Rule: + pdata = rule_paragraph(indent, conf); + p->private_data = pdata; break; /* @@ -164,75 +430,45 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, case para_Description: case para_Copyright: case para_Title: - pdata = mknew(para_data); - - /* - * FIXME: Subsidiary switch on paragraph type to decide - * what font set to use for this paragraph. - */ - pdata->fonts[FONT_NORMAL] = tr; - pdata->sizes[FONT_NORMAL] = 12; - pdata->fonts[FONT_EMPH] = ti; - pdata->sizes[FONT_EMPH] = 12; - pdata->fonts[FONT_CODE] = cr; - pdata->sizes[FONT_CODE] = 12; - - /* - * FIXME: Also select an indentation level depending on - * the paragraph type (list paragraphs other than - * para_DescribedThing need extra indent). - * - * Perhaps at some point we might even arrange for the - * user to be able to request indented first lines in - * paragraphs. - */ - extra_indent = 0; - firstline_indent = 0; + pdata = make_para_data(p->type, p->aux, indent, 0, + p->kwtext, p->kwtext2, p->words, conf); - wrap_paragraph(pdata, p->words, base_width, - indent + firstline_indent, - indent + extra_indent); + p->private_data = pdata; - /* - * FIXME: Also find the auxiliary data for this - * paragraph. For para_Bullet it's a bullet; for - * para_NumberedList it's the number; for some section - * headings (depending on the style of section heading - * selected) it's the section number. - * - * Assign into pdata->first->aux_*. - */ + break; + } - p->private_data = pdata; + if (p->private_data) { + pdata = (para_data *)p->private_data; /* - * Set the line spacing for each line in this paragraph. + * If this is the first non-title heading, we link the + * contents section in before it. */ - for (ldata = pdata->first; ldata; ldata = ldata->next) { - if (ldata == pdata->first) - ldata->space_before = base_para_spacing / 2; + if (!used_contents && pdata->outline_level > 0) { + used_contents = TRUE; + if (lastpara) + lastpara->next = firstcont; else - ldata->space_before = base_leading / 2; - if (ldata == pdata->last) - ldata->space_after = base_para_spacing / 2; - else - ldata->space_after = base_leading / 2; - ldata->page_break = FALSE; + firstpara = firstcont; + lastpara = lastcont; + assert(lastpara->next == NULL); + + if (lastline) { + lastline->next = firstcontline; + firstcontline->prev = lastline; + } else { + firstline = firstcontline; + firstcontline->prev = NULL; + } + assert(lastcontline != NULL); + lastline = lastcontline; + lastline->next = NULL; } /* - * FIXME: some kinds of section heading do require a - * page break before them. + * Link all line structures together into a big list. */ - - break; - } - - /* - * Link all line structures together into a big list. - */ - if (p->private_data) { - pdata = (para_data *)p->private_data; if (pdata->first) { if (lastline) { lastline->next = pdata->first; @@ -242,7 +478,18 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, pdata->first->prev = NULL; } lastline = pdata->last; + lastline->next = NULL; } + + /* + * Link all paragraph structures together similarly. + */ + pdata->next = NULL; + if (lastpara) + lastpara->next = pdata; + else + firstpara = pdata; + lastpara = pdata; } } @@ -250,33 +497,566 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, * Now we have an enormous linked list of every line of text in * the document. Break it up into pages. */ - pages = page_breaks(firstline, lastline, page_height); + pages = page_breaks(firstline, lastline, conf->page_height, 0, 0); + + /* + * Number the pages. + */ + { + char buf[40]; + page_data *page; + + pagenum = 0; + + for (page = pages; page; page = page->next) { + sprintf(buf, "%d", ++pagenum); + page->number = ufroma_dup(buf, CS_ASCII); + } + + if (has_index) { + first_index_page = mknew(page_data); + first_index_page->next = first_index_page->prev = NULL; + first_index_page->first_line = NULL; + first_index_page->last_line = NULL; + first_index_page->first_text = first_index_page->last_text = NULL; + first_index_page->first_xref = first_index_page->last_xref = NULL; + first_index_page->first_rect = first_index_page->last_rect = NULL; + + /* And don't forget the as-yet-uncreated index. */ + sprintf(buf, "%d", ++pagenum); + first_index_page->number = ufroma_dup(buf, CS_ASCII); + } + } /* * Now we're ready to actually lay out the pages. We do this by * looping over _paragraphs_, since we may need to track cross- * references between lines and even across pages. */ - for (p = sourceform; p; p = p->next) { - pdata = (para_data *)p->private_data; + for (pdata = firstpara; pdata; pdata = pdata->next) + render_para(pdata, conf, keywords, idx, + &index_placeholder_para, first_index_page); - if (pdata) { - for (ldata = pdata->first; ldata; ldata = ldata->next) { - render_line(ldata, left_margin, paper_height - top_margin); - if (ldata == pdata->last) - break; + /* + * Now we've laid out the main body pages, we should have + * acquired a full set of page numbers for the index. + */ + if (has_index) { + int i; + indexentry *entry; + word *index_title; + para_data *firstidx, *lastidx; + line_data *firstidxline, *lastidxline, *ldata; + page_data *ipages, *ipages2, *page; + + /* + * Create a set of paragraphs for the index. + */ + index_title = fake_word(L"Index"); + + firstidx = make_para_data(para_UnnumberedChapter, 0, 0, 0, + NULL, NULL, index_title, conf); + lastidx = firstidx; + lastidx->next = NULL; + firstidxline = firstidx->first; + lastidxline = lastidx->last; + for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { + paper_idx *pi = (paper_idx *)entry->backend_data; + para_data *text, *pages; + + if (!pi->words) + continue; + + text = make_para_data(para_Normal, 0, 0, + conf->base_width - conf->index_colwidth, + NULL, NULL, entry->text, conf); + + pages = make_para_data(para_Normal, 0, 0, + conf->base_width - conf->index_colwidth, + NULL, NULL, pi->words, conf); + + text->justification = LEFT; + pages->justification = RIGHT; + text->last->space_after = pages->first->space_before = + conf->base_leading / 2; + + pages->last->space_after = text->first->space_before = + conf->base_leading; + + assert(text->first); + assert(pages->first); + assert(lastidxline); + assert(lastidx); + + /* + * If feasible, fold the two halves of the index entry + * together. + */ + if (text->last->real_shortfall + pages->first->real_shortfall > + conf->index_colwidth + conf->index_minsep) { + text->last->space_after = -1; + pages->first->space_before = -pages->first->line_height+1; + } + + lastidx->next = text; + text->next = pages; + pages->next = NULL; + lastidx = pages; + + /* + * Link all index line structures together into + * a big list. + */ + text->last->next = pages->first; + pages->first->prev = text->last; + + lastidxline->next = text->first; + text->first->prev = lastidxline; + + lastidxline = pages->last; + + /* + * Breaking an index entry anywhere is so bad that I + * think I'm going to forbid it totally. + */ + for (ldata = text->first; ldata && ldata->next; + ldata = ldata->next) { + ldata->next->space_before += ldata->space_after + 1; + ldata->space_after = -1; } } + + /* + * Now break the index into pages. + */ + ipages = page_breaks(firstidxline, firstidxline, conf->page_height, + 0, 0); + ipages2 = page_breaks(firstidxline->next, lastidxline, + conf->page_height, + conf->index_cols, + firstidxline->space_before + + firstidxline->line_height + + firstidxline->space_after); + + /* + * This will have put each _column_ of the index on a + * separate page, which isn't what we want. Fold the pages + * back together. + */ + page = ipages2; + while (page) { + int i; + + for (i = 1; i < conf->index_cols; i++) + if (page->next) { + page_data *tpage; + + fold_into_page(page, page->next, + i * (conf->index_colwidth + + conf->index_gutter)); + tpage = page->next; + page->next = page->next->next; + if (page->next) + page->next->prev = page; + sfree(tpage); + } + + page = page->next; + } + /* Also fold the heading on to the same page as the index items. */ + fold_into_page(ipages, ipages2, 0); + ipages->next = ipages2->next; + if (ipages->next) + ipages->next->prev = ipages; + sfree(ipages2); + fold_into_page(first_index_page, ipages, 0); + first_index_page->next = ipages->next; + if (first_index_page->next) + first_index_page->next->prev = first_index_page; + sfree(ipages); + ipages = first_index_page; + + /* + * Number the index pages, except the already-numbered + * first one. + */ + for (page = ipages->next; page; page = page->next) { + char buf[40]; + sprintf(buf, "%d", ++pagenum); + page->number = ufroma_dup(buf, CS_ASCII); + } + + /* + * Render the index pages. + */ + for (pdata = firstidx; pdata; pdata = pdata->next) + render_para(pdata, conf, keywords, idx, + &index_placeholder_para, first_index_page); + + /* + * Link the index page list on to the end of the main page + * list. + */ + if (!pages) + pages = ipages; + else { + for (page = pages; page->next; page = page->next); + page->next = ipages; + } + + /* + * Same with the paragraph list, which will cause the index + * to be mentioned in the document outline. + */ + if (!firstpara) + firstpara = firstidx; + else + lastpara->next = firstidx; + lastpara = lastidx; + } + + /* + * Draw the headers and footers. + * + * FIXME: this should be fully configurable, but for the moment + * I'm just going to put in page numbers in the centre of a + * footer and leave it at that. + */ + { + page_data *page; + + for (page = pages; page; page = page->next) { + int width; + + width = conf->pagenum_fontsize * + string_width(conf->tr, page->number, NULL); + + render_string(page, conf->tr, conf->pagenum_fontsize, + conf->left_margin + (conf->base_width - width)/2, + conf->bottom_margin - conf->footer_distance, + page->number); + } } + /* + * Start putting together the overall document structure we're + * going to return. + */ doc = mknew(document); doc->fonts = fontlist; doc->pages = pages; - doc->paper_width = paper_width; - doc->paper_height = paper_height; + doc->paper_width = conf->paper_width; + doc->paper_height = conf->paper_height; + + /* + * Collect the section heading paragraphs into a document + * outline. This is slightly fiddly because the Title paragraph + * isn't required to be at the start, although all the others + * must be in order. + */ + { + int osize = 20; + + doc->outline_elements = mknewa(outline_element, osize); + doc->n_outline_elements = 0; + + /* First find the title. */ + for (pdata = firstpara; pdata; pdata = pdata->next) { + if (pdata->outline_level == 0) { + doc->outline_elements[0].level = 0; + doc->outline_elements[0].pdata = pdata; + doc->n_outline_elements++; + break; + } + } + + /* Then collect the rest. */ + for (pdata = firstpara; pdata; pdata = pdata->next) { + if (pdata->outline_level > 0) { + if (doc->n_outline_elements >= osize) { + osize += 20; + doc->outline_elements = + resize(doc->outline_elements, osize); + } + + doc->outline_elements[doc->n_outline_elements].level = + pdata->outline_level; + doc->outline_elements[doc->n_outline_elements].pdata = pdata; + doc->n_outline_elements++; + } + } + } + + sfree(conf); + return doc; } +static para_data *make_para_data(int ptype, int paux, int indent, int rmargin, + word *pkwtext, word *pkwtext2, word *pwords, + paper_conf *conf) +{ + para_data *pdata; + line_data *ldata; + int extra_indent, firstline_indent, aux_indent; + word *aux, *aux2; + + pdata = mknew(para_data); + pdata->outline_level = -1; + pdata->outline_title = NULL; + pdata->rect_type = RECT_NONE; + pdata->contents_entry = NULL; + pdata->justification = JUST; + + /* + * Choose fonts for this paragraph. + * + * FIXME: All of this ought to be completely + * user-configurable. + */ + switch (ptype) { + case para_Title: + pdata->fonts[FONT_NORMAL] = conf->hr; + pdata->sizes[FONT_NORMAL] = 24; + pdata->fonts[FONT_EMPH] = conf->hi; + pdata->sizes[FONT_EMPH] = 24; + pdata->fonts[FONT_CODE] = conf->cb; + pdata->sizes[FONT_CODE] = 24; + pdata->outline_level = 0; + break; + + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + pdata->fonts[FONT_NORMAL] = conf->hr; + pdata->sizes[FONT_NORMAL] = 20; + pdata->fonts[FONT_EMPH] = conf->hi; + pdata->sizes[FONT_EMPH] = 20; + pdata->fonts[FONT_CODE] = conf->cb; + pdata->sizes[FONT_CODE] = 20; + pdata->outline_level = 1; + break; + + case para_Heading: + case para_Subsect: + pdata->fonts[FONT_NORMAL] = conf->hr; + pdata->fonts[FONT_EMPH] = conf->hi; + pdata->fonts[FONT_CODE] = conf->cb; + pdata->sizes[FONT_NORMAL] = + pdata->sizes[FONT_EMPH] = + pdata->sizes[FONT_CODE] = + (paux == 0 ? 16 : paux == 1 ? 14 : 13); + pdata->outline_level = 2 + paux; + break; + + case para_Normal: + case para_BiblioCited: + case para_Bullet: + case para_NumberedList: + case para_DescribedThing: + case para_Description: + case para_Copyright: + pdata->fonts[FONT_NORMAL] = conf->tr; + pdata->sizes[FONT_NORMAL] = 12; + pdata->fonts[FONT_EMPH] = conf->ti; + pdata->sizes[FONT_EMPH] = 12; + pdata->fonts[FONT_CODE] = conf->cr; + pdata->sizes[FONT_CODE] = 12; + break; + } + + /* + * Also select an indentation level depending on the + * paragraph type (list paragraphs other than + * para_DescribedThing need extra indent). + * + * (FIXME: Perhaps at some point we might even arrange + * for the user to be able to request indented first + * lines in paragraphs.) + */ + if (ptype == para_Bullet || + ptype == para_NumberedList || + ptype == para_Description) { + extra_indent = firstline_indent = conf->indent_list; + } else { + extra_indent = firstline_indent = 0; + } + + /* + * Find the auxiliary text for this paragraph. + */ + aux = aux2 = NULL; + aux_indent = 0; + + switch (ptype) { + case para_Chapter: + case para_Appendix: + case para_Heading: + case para_Subsect: + /* + * For some heading styles (FIXME: be able to + * configure which), the auxiliary text contains + * the chapter number and is arranged to be + * right-aligned a few points left of the primary + * margin. For other styles, the auxiliary text is + * the full chapter _name_ and takes up space + * within the (wrapped) chapter title, meaning that + * we must move the first line indent over to make + * space for it. + */ + if (ptype == para_Heading || ptype == para_Subsect) { + int len; + + aux = pkwtext2; + len = paper_width_simple(pdata, pkwtext2); + aux_indent = -len - conf->sect_num_left_space; + + pdata->outline_title = + prepare_outline_title(pkwtext2, L" ", pwords); + } else { + aux = pkwtext; + aux2 = fake_word(L": "); + aux_indent = 0; + + firstline_indent += paper_width_simple(pdata, aux); + firstline_indent += paper_width_simple(pdata, aux2); + + pdata->outline_title = + prepare_outline_title(pkwtext, L": ", pwords); + } + break; + + case para_Bullet: + /* + * Auxiliary text consisting of a bullet. (FIXME: + * configurable bullet.) + */ + aux = fake_word(L"\x2022"); + aux_indent = indent + conf->indent_list_bullet; + break; + + case para_NumberedList: + /* + * Auxiliary text consisting of the number followed + * by a (FIXME: configurable) full stop. + */ + aux = pkwtext; + aux2 = fake_word(L"."); + aux_indent = indent + conf->indent_list_bullet; + break; + + case para_BiblioCited: + /* + * Auxiliary text consisting of the bibliography + * reference text, and a trailing space. + */ + aux = pkwtext; + aux2 = fake_word(L" "); + aux_indent = indent; + firstline_indent += paper_width_simple(pdata, aux); + firstline_indent += paper_width_simple(pdata, aux2); + break; + } + + if (pdata->outline_level >= 0 && !pdata->outline_title) { + pdata->outline_title = + prepare_outline_title(NULL, NULL, pwords); + } + + wrap_paragraph(pdata, pwords, conf->base_width - rmargin, + indent + firstline_indent, + indent + extra_indent); + + pdata->first->aux_text = aux; + pdata->first->aux_text_2 = aux2; + pdata->first->aux_left_indent = aux_indent; + + /* + * Line breaking penalties. + */ + switch (ptype) { + case para_Chapter: + case para_Appendix: + case para_Heading: + case para_Subsect: + case para_UnnumberedChapter: + /* + * Fixed and large penalty for breaking straight + * after a heading; corresponding bonus for + * breaking straight before. + */ + pdata->first->penalty_before = -500000; + pdata->last->penalty_after = 500000; + for (ldata = pdata->first; ldata; ldata = ldata->next) + ldata->penalty_after = 500000; + break; + + case para_DescribedThing: + /* + * This is treated a bit like a small heading: + * there's a penalty for breaking after it (i.e. + * between it and its description), and a bonus for + * breaking before it (actually _between_ list + * items). + */ + pdata->first->penalty_before = -200000; + pdata->last->penalty_after = 200000; + break; + + default: + /* + * Most paragraph types: widow/orphan control by + * discouraging breaking one line from the end of + * any paragraph. + */ + if (pdata->first != pdata->last) { + pdata->first->penalty_after = 100000; + pdata->last->penalty_before = 100000; + } + break; + } + + standard_line_spacing(pdata, conf); + + /* + * Some kinds of section heading require a page break before + * them and an underline after. + */ + if (ptype == para_Title || + ptype == para_Chapter || + ptype == para_Appendix || + ptype == para_UnnumberedChapter) { + pdata->first->page_break = TRUE; + pdata->first->space_before = conf->chapter_top_space; + pdata->last->space_after += + (conf->chapter_underline_depth + + conf->chapter_underline_thickness); + pdata->rect_type = RECT_CHAPTER_UNDERLINE; + } + + return pdata; +} + +static void standard_line_spacing(para_data *pdata, paper_conf *conf) +{ + line_data *ldata; + + /* + * Set the line spacing for each line in this paragraph. + */ + for (ldata = pdata->first; ldata; ldata = ldata->next) { + if (ldata == pdata->first) + ldata->space_before = conf->base_para_spacing / 2; + else + ldata->space_before = conf->base_leading / 2; + if (ldata == pdata->last) + ldata->space_after = conf->base_para_spacing / 2; + else + ldata->space_after = conf->base_leading / 2; + ldata->page_break = FALSE; + } +} + static font_encoding *new_font_encoding(font_data *font) { font_encoding *fe; @@ -385,25 +1165,23 @@ static int string_width(font_data *font, wchar_t const *string, int *errs) return width; } -static int paper_width(void *vctx, word *word); +static int paper_width_internal(void *vctx, word *word, int *nspaces); struct paper_width_ctx { int minspacewidth; para_data *pdata; }; -static int paper_width_list(void *vctx, word *text, word *end) { +static int paper_width_list(void *vctx, word *text, word *end, int *nspaces) { int w = 0; - while (text) { - w += paper_width(vctx, text); - if (text == end) - break; + while (text && text != end) { + w += paper_width_internal(vctx, text, nspaces); text = text->next; } return w; } -static int paper_width(void *vctx, word *word) +static int paper_width_internal(void *vctx, word *word, int *nspaces) { struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx; int style, type, findex, width, errs; @@ -414,6 +1192,7 @@ static int paper_width(void *vctx, word *word) case word_HyperEnd: case word_UpperXref: case word_LowerXref: + case word_PageXref: case word_XrefEnd: case word_IndexRef: return 0; @@ -429,9 +1208,11 @@ static int paper_width(void *vctx, word *word) if (type == word_Normal) { str = word->text; } else if (type == word_WhiteSpace) { - if (findex != FONT_CODE) + if (findex != FONT_CODE) { + if (nspaces) + (*nspaces)++; return ctx->minspacewidth; - else + } else str = L" "; } else /* if (type == word_Quote) */ { if (word->aux == quote_Open) @@ -443,11 +1224,28 @@ static int paper_width(void *vctx, word *word) width = string_width(ctx->pdata->fonts[findex], str, &errs); if (errs && word->alt) - return paper_width_list(vctx, word->alt, NULL); + return paper_width_list(vctx, word->alt, NULL, nspaces); else return ctx->pdata->sizes[findex] * width; } +static int paper_width(void *vctx, word *word) +{ + return paper_width_internal(vctx, word, NULL); +} + +static int paper_width_simple(para_data *pdata, word *text) +{ + struct paper_width_ctx ctx; + + ctx.pdata = pdata; + ctx.minspacewidth = + (pdata->sizes[FONT_NORMAL] * + string_width(pdata->fonts[FONT_NORMAL], L" ", NULL)); + + return paper_width_list(&ctx, text, NULL, NULL); +} + static void wrap_paragraph(para_data *pdata, word *words, int w, int i1, int i2) { @@ -504,7 +1302,7 @@ static void wrap_paragraph(para_data *pdata, word *words, ldata->pdata = pdata; ldata->first = p->begin; - ldata->last = p->end; + ldata->end = p->end; ldata->line_height = line_height; ldata->xpos = (p == wrapping ? i1 : i2); @@ -519,69 +1317,43 @@ static void wrap_paragraph(para_data *pdata, word *words, ldata->next = NULL; pdata->last = ldata; - len = paper_width_list(&ctx, ldata->first, ldata->last); - wid = (p == wrapping ? w - i1 : w - i2); spaces = 0; + len = paper_width_list(&ctx, ldata->first, ldata->end, &spaces); + wid = (p == wrapping ? w - i1 : w - i2); wd = ldata->first; - while (wd) { -#if 0 - switch (wd->type) { - case word_HyperLink: - case word_HyperEnd: - case word_UpperXref: - case word_LowerXref: - case word_XrefEnd: - case word_IndexRef: - break; - default: - if (removeattr(wd->type) == word_Normal) - printf("%ls", wd->text); - else if (removeattr(wd->type) == word_WhiteSpace) - printf(" "); - else if (removeattr(wd->type) == word_Quote) - printf(wd->aux == quote_Open ? "`" : "'"); - break; - } -#endif - if (removeattr(wd->type) == word_WhiteSpace) - spaces++; - if (wd == ldata->last) - break; - wd = wd->next; - } - - if (spaces) { - ldata->space_adjust = (wid - len) / spaces; - /* - * This tells us how much the space width needs to - * change from _min_spacewidth. But we want to store - * its difference from the _natural_ space width, to - * make the text rendering easier. - */ - ldata->space_adjust += ctx.minspacewidth; - ldata->space_adjust -= spacewidth; - /* - * Special case: on the last line of a paragraph, we - * never stretch spaces. - */ - if (ldata->space_adjust > 0 && !p->next) - ldata->space_adjust = 0; - } else { - ldata->space_adjust = 0; - } + ldata->hshortfall = wid - len; + ldata->nspaces = spaces; + /* + * This tells us how much the space width needs to + * change from _min_spacewidth. But we want to store + * its difference from the _natural_ space width, to + * make the text rendering easier. + */ + ldata->hshortfall += ctx.minspacewidth * spaces; + ldata->hshortfall -= spacewidth * spaces; + ldata->real_shortfall = ldata->hshortfall; + /* + * Special case: on the last line of a paragraph, we + * never stretch spaces. + */ + if (ldata->hshortfall > 0 && !p->next) + ldata->hshortfall = 0; ldata->aux_text = NULL; + ldata->aux_text_2 = NULL; ldata->aux_left_indent = 0; + ldata->penalty_before = ldata->penalty_after = 0; } } static page_data *page_breaks(line_data *first, line_data *last, - int page_height) + int page_height, int ncols, int headspace) { line_data *l, *m; page_data *ph, *pt; + int n, n1, this_height; /* * Page breaking is done by a close analogue of the optimal @@ -593,73 +1365,111 @@ static page_data *page_breaks(line_data *first, line_data *last, * function for optimally page-breaking everything after that * page, and pick the best option. * + * This is made slightly more complex by the fact that we have + * a multi-column index with a heading at the top of the + * _first_ page, meaning that the first _ncols_ pages must have + * a different length. Hence, we must do the wrapping ncols+1 + * times over, hypothetically trying to put every subsequence + * on every possible page. + * * Since my line_data structures are only used for this * purpose, I might as well just store the algorithm data * directly in them. */ for (l = last; l; l = l->prev) { - int minheight, text = 0, space = 0; - int cost; - - l->bestcost = -1; - for (m = l; m; m = m->next) { - if (m != l && m->page_break) - break; /* we've gone as far as we can */ - - if (m != l) - space += m->prev->space_after; - if (m != l || m->page_break) - space += m->space_before; - text += m->line_height; - minheight = text + space; - - if (m != l && minheight > page_height) - break; - - /* - * Compute the cost of this arrangement, as the square - * of the amount of wasted space on the page. - * Exception: if this is the last page before a - * mandatory break or the document end, we don't - * penalise a large blank area. - */ - if (m->next && !m->next->page_break) - { - int x = page_height - minheight; - int xf; - - xf = x & 0xFF; - x >>= 8; - - cost = x*x; - cost += (x * xf) >> 8; - } else - cost = 0; - - /* - * FIXME: here I should introduce penalties for - * breaking in mid-paragraph, particularly very close - * to one end of a paragraph and particularly in code - * paragraphs. - */ + l->bestcost = mknewa(int, ncols+1); + l->vshortfall = mknewa(int, ncols+1); + l->text = mknewa(int, ncols+1); + l->space = mknewa(int, ncols+1); + l->page_last = mknewa(line_data *, ncols+1); + + for (n = 0; n <= ncols; n++) { + int minheight, text = 0, space = 0; + int cost; + + n1 = (n < ncols ? n+1 : ncols); + if (n < ncols) + this_height = page_height - headspace; + else + this_height = page_height; + + l->bestcost[n] = -1; + for (m = l; m; m = m->next) { + if (m != l && m->page_break) + break; /* we've gone as far as we can */ + + if (m != l) { + if (m->prev->space_after > 0) + space += m->prev->space_after; + else + text += m->prev->space_after; + } + if (m != l || m->page_break) { + if (m->space_before > 0) + space += m->space_before; + else + text += m->space_before; + } + text += m->line_height; + minheight = text + space; - if (m->next && !m->next->page_break) - cost += m->next->bestcost; + if (m != l && minheight > this_height) + break; - if (l->bestcost == -1 || l->bestcost > cost) { /* - * This is the best option yet for this starting - * point. + * If the space after this paragraph is _negative_ + * (which means the next line is folded on to this + * one, which happens in the index), we absolutely + * cannot break here. */ - l->bestcost = cost; - if (m->next && !m->next->page_break) - l->shortfall = page_height - minheight; - else - l->shortfall = 0; - l->text = text; - l->space = space; - l->page_last = m; + if (m->space_after >= 0) { + + /* + * Compute the cost of this arrangement, as the + * square of the amount of wasted space on the + * page. Exception: if this is the last page + * before a mandatory break or the document + * end, we don't penalise a large blank area. + */ + if (m != last && m->next && !m->next->page_break) + { + int x = this_height - minheight; + int xf; + + xf = x & 0xFF; + x >>= 8; + + cost = x*x; + cost += (x * xf) >> 8; + } else + cost = 0; + + if (m != last && m->next && !m->next->page_break) { + cost += m->penalty_after; + cost += m->next->penalty_before; + } + + if (m != last && m->next && !m->next->page_break) + cost += m->next->bestcost[n1]; + if (l->bestcost[n] == -1 || l->bestcost[n] > cost) { + /* + * This is the best option yet for this + * starting point. + */ + l->bestcost[n] = cost; + if (m != last && m->next && !m->next->page_break) + l->vshortfall[n] = this_height - minheight; + else + l->vshortfall[n] = 0; + l->text[n] = text; + l->space[n] = space; + l->page_last[n] = m; + } + } + + if (m == last) + break; } } } @@ -671,9 +1481,10 @@ static page_data *page_breaks(line_data *first, line_data *last, ph = pt = NULL; l = first; + n = 0; while (l) { page_data *page; - int text, space; + int text, space, head; page = mknew(page_data); page->next = NULL; @@ -685,38 +1496,72 @@ static page_data *page_breaks(line_data *first, line_data *last, pt = page; page->first_line = l; - page->last_line = l->page_last; + page->last_line = l->page_last[n]; page->first_text = page->last_text = NULL; + page->first_xref = page->last_xref = NULL; + page->first_rect = page->last_rect = NULL; /* * Now assign a y-coordinate to each line on the page. */ text = space = 0; + head = (n < ncols ? headspace : 0); for (l = page->first_line; l; l = l->next) { - if (l != page->first_line) - space += l->prev->space_after; - if (l != page->first_line || l->page_break) - space += l->space_before; + if (l != page->first_line) { + if (l->prev->space_after > 0) + space += l->prev->space_after; + else + text += l->prev->space_after; + } + if (l != page->first_line || l->page_break) { + if (l->space_before > 0) + space += l->space_before; + else + text += l->space_before; + } text += l->line_height; l->page = page; - l->ypos = text + space + - space * (float)page->first_line->shortfall / - page->first_line->space; + l->ypos = text + space + head + + space * (float)page->first_line->vshortfall[n] / + page->first_line->space[n]; if (l == page->last_line) break; } - l = page->last_line->next; + l = page->last_line; + if (l == last) + break; + l = l->next; + + n = (n < ncols ? n+1 : ncols); } return ph; } +static void add_rect_to_page(page_data *page, int x, int y, int w, int h) +{ + rect *r = mknew(rect); + + r->next = NULL; + if (page->last_rect) + page->last_rect->next = r; + else + page->first_rect = r; + page->last_rect = r; + + r->x = x; + r->y = y; + r->w = w; + r->h = h; +} + static void add_string_to_page(page_data *page, int x, int y, - font_encoding *fe, int size, char *text) + font_encoding *fe, int size, char *text, + int width) { text_fragment *frag; @@ -734,6 +1579,7 @@ static void add_string_to_page(page_data *page, int x, int y, frag->fe = fe; frag->fontsize = size; frag->text = dupstr(text); + frag->width = width; } /* @@ -752,8 +1598,10 @@ static int render_string(page_data *page, font_data *font, int fontsize, while (*str) { glyph = font->bmp[*str]; - if (glyph == 0xFFFF) + if (glyph == 0xFFFF) { + str++; continue; /* nothing more we can do here */ + } /* * Find which subfont this character is going in. @@ -785,7 +1633,8 @@ static int render_string(page_data *page, font_data *font, int fontsize, if (!subfont || sf != subfont) { if (subfont) { text[textpos] = '\0'; - add_string_to_page(page, x, y, subfont, fontsize, text); + add_string_to_page(page, x, y, subfont, fontsize, text, + textwid); x += textwid; } else { assert(textpos == 0); @@ -802,7 +1651,7 @@ static int render_string(page_data *page, font_data *font, int fontsize, if (textpos > 0) { text[textpos] = '\0'; - add_string_to_page(page, x, y, subfont, fontsize, text); + add_string_to_page(page, x, y, subfont, fontsize, text, textwid); x += textwid; } @@ -812,27 +1661,132 @@ static int render_string(page_data *page, font_data *font, int fontsize, /* * Returns the updated x coordinate. */ -static int render_text(page_data *page, para_data *pdata, int x, int y, - word *text, word *text_end, int space_adjust) +static int render_text(page_data *page, para_data *pdata, line_data *ldata, + int x, int y, word *text, word *text_end, xref **xr, + int shortfall, int nspaces, int *nspace, + keywordlist *keywords, indexdata *idx) { - while (text) { + while (text && text != text_end) { int style, type, findex, errs; wchar_t *str; + xref_dest dest; switch (text->type) { + /* + * Start a cross-reference. + */ case word_HyperLink: - case word_HyperEnd: case word_UpperXref: case word_LowerXref: + case word_PageXref: + + if (text->type == word_HyperLink) { + dest.type = URL; + dest.url = utoa_dup(text->text, CS_ASCII); + dest.page = NULL; + } else if (text->type == word_PageXref) { + dest.type = PAGE; + dest.url = NULL; + dest.page = (page_data *)text->private_data; + } else { + keyword *kwl = kw_lookup(keywords, text->text); + para_data *pdata; + + if (kwl) { + assert(kwl->para->private_data); + pdata = (para_data *) kwl->para->private_data; + dest.type = PAGE; + dest.page = pdata->first->page; + dest.url = NULL; + } else { + /* + * Shouldn't happen, but *shrug* + */ + dest.type = NONE; + dest.page = NULL; + dest.url = NULL; + } + } + if (dest.type != NONE) { + *xr = mknew(xref); + (*xr)->dest = dest; /* structure copy */ + if (page->last_xref) + page->last_xref->next = *xr; + else + page->first_xref = *xr; + page->last_xref = *xr; + (*xr)->next = NULL; + + /* + * FIXME: Ideally we should have, and use, some + * vertical font metric information here so that + * our cross-ref rectangle can take account of + * descenders and the font's cap height. This will + * do for the moment, but it isn't ideal. + */ + (*xr)->lx = (*xr)->rx = x; + (*xr)->by = y; + (*xr)->ty = y + ldata->line_height; + } + goto nextword; + + /* + * Finish extending a cross-reference box. + */ + case word_HyperEnd: case word_XrefEnd: - case word_IndexRef: + *xr = NULL; goto nextword; + /* - * FIXME: we should do something with all of these! - * Hyperlinks and xrefs have meaning in PDF, and this - * is probably the right place to nail down the index - * references too. + * Add the current page number to the list of pages + * referenced by an index entry. */ + case word_IndexRef: + /* + * We don't create index references in contents entries. + */ + if (!pdata->contents_entry) { + indextag *tag; + int i; + + tag = index_findtag(idx, text->text); + if (!tag) + goto nextword; + + for (i = 0; i < tag->nrefs; i++) { + indexentry *entry = tag->refs[i]; + paper_idx *pi = (paper_idx *)entry->backend_data; + + /* + * If the same index term is indexed twice + * within the same section, we only want to + * mention it once in the index. + */ + if (pi->lastpage != page) { + word **wp; + + if (pi->lastword) { + pi->lastword = pi->lastword->next = + fake_word(L","); + pi->lastword = pi->lastword->next = + fake_space_word(); + wp = &pi->lastword->next; + } else + wp = &pi->words; + + pi->lastword = *wp = + fake_page_ref(page); + pi->lastword = pi->lastword->next = + fake_word(page->number); + pi->lastword = pi->lastword->next = + fake_end_ref(); + } + + pi->lastpage = page; + } + } + goto nextword; } style = towordstyle(text->type); @@ -847,7 +1801,11 @@ static int render_text(page_data *page, para_data *pdata, int x, int y, } else if (type == word_WhiteSpace) { x += pdata->sizes[findex] * string_width(pdata->fonts[findex], L" ", NULL); - x += space_adjust; + if (nspaces && findex != FONT_CODE) { + x += (*nspace+1) * shortfall / nspaces; + x -= *nspace * shortfall / nspaces; + (*nspace)++; + } goto nextword; } else /* if (type == word_Quote) */ { if (text->aux == quote_Open) @@ -859,26 +1817,561 @@ static int render_text(page_data *page, para_data *pdata, int x, int y, (void) string_width(pdata->fonts[findex], str, &errs); if (errs && text->alt) - x = render_text(page, pdata, x, y, text->alt, NULL, space_adjust); + x = render_text(page, pdata, ldata, x, y, text->alt, NULL, + xr, shortfall, nspaces, nspace, keywords, idx); else x = render_string(page, pdata->fonts[findex], pdata->sizes[findex], x, y, str); + if (*xr) + (*xr)->rx = x; + nextword: - if (text == text_end) - break; text = text->next; } return x; } -static void render_line(line_data *ldata, int left_x, int top_y) +/* + * Returns the last x position used on the line. + */ +static int render_line(line_data *ldata, int left_x, int top_y, + xref_dest *dest, keywordlist *keywords, indexdata *idx) +{ + int nspace; + xref *xr; + int ret = 0; + + if (ldata->aux_text) { + int x; + xr = NULL; + nspace = 0; + x = render_text(ldata->page, ldata->pdata, ldata, + left_x + ldata->aux_left_indent, + top_y - ldata->ypos, + ldata->aux_text, NULL, &xr, 0, 0, &nspace, + keywords, idx); + if (ldata->aux_text_2) + render_text(ldata->page, ldata->pdata, ldata, + x, top_y - ldata->ypos, + ldata->aux_text_2, NULL, &xr, 0, 0, &nspace, + keywords, idx); + } + nspace = 0; + + if (ldata->first) { + /* + * There might be a cross-reference carried over from a + * previous line. + */ + if (dest->type != NONE) { + xr = mknew(xref); + xr->next = NULL; + xr->dest = *dest; /* structure copy */ + if (ldata->page->last_xref) + ldata->page->last_xref->next = xr; + else + ldata->page->first_xref = xr; + ldata->page->last_xref = xr; + xr->lx = xr->rx = left_x + ldata->xpos; + xr->by = top_y - ldata->ypos; + xr->ty = top_y - ldata->ypos + ldata->line_height; + } else + xr = NULL; + + { + int extra_indent, shortfall, spaces; + int just = ldata->pdata->justification; + + /* + * All forms of justification become JUST when we have + * to squeeze the paragraph. + */ + if (ldata->hshortfall < 0) + just = JUST; + + switch (just) { + case JUST: + shortfall = ldata->hshortfall; + spaces = ldata->nspaces; + extra_indent = 0; + break; + case LEFT: + shortfall = spaces = extra_indent = 0; + break; + case RIGHT: + shortfall = spaces = 0; + extra_indent = ldata->real_shortfall; + break; + } + + ret = render_text(ldata->page, ldata->pdata, ldata, + left_x + ldata->xpos + extra_indent, + top_y - ldata->ypos, ldata->first, ldata->end, + &xr, shortfall, spaces, &nspace, + keywords, idx); + } + + if (xr) { + /* + * There's a cross-reference continued on to the next line. + */ + *dest = xr->dest; + } else + dest->type = NONE; + } + + return ret; +} + +static void render_para(para_data *pdata, paper_conf *conf, + keywordlist *keywords, indexdata *idx, + paragraph *index_placeholder, page_data *index_page) +{ + int last_x; + xref *cxref; + page_data *cxref_page; + xref_dest dest; + para_data *target; + line_data *ldata; + + dest.type = NONE; + cxref = NULL; + cxref_page = NULL; + + for (ldata = pdata->first; ldata; ldata = ldata->next) { + /* + * If this is a contents entry, we expect to have a single + * enormous cross-reference rectangle covering the whole + * thing. (Unless, of course, it spans multiple pages.) + */ + if (pdata->contents_entry && ldata->page != cxref_page) { + cxref_page = ldata->page; + cxref = mknew(xref); + cxref->next = NULL; + cxref->dest.type = PAGE; + if (pdata->contents_entry == index_placeholder) { + cxref->dest.page = index_page; + } else { + assert(pdata->contents_entry->private_data); + target = (para_data *)pdata->contents_entry->private_data; + cxref->dest.page = target->first->page; + } + cxref->dest.url = NULL; + if (ldata->page->last_xref) + ldata->page->last_xref->next = cxref; + else + ldata->page->first_xref = cxref; + ldata->page->last_xref = cxref; + cxref->lx = conf->left_margin; + cxref->rx = conf->paper_width - conf->right_margin; + cxref->ty = conf->paper_height - conf->top_margin + - ldata->ypos + ldata->line_height; + } + if (pdata->contents_entry) { + assert(cxref != NULL); + cxref->by = conf->paper_height - conf->top_margin + - ldata->ypos; + } + + last_x = render_line(ldata, conf->left_margin, + conf->paper_height - conf->top_margin, + &dest, keywords, idx); + if (ldata == pdata->last) + break; + } + + /* + * If this is a contents entry, add leaders and a page + * number. + */ + if (pdata->contents_entry) { + word *w; + wchar_t *num; + int wid; + int x; + + if (pdata->contents_entry == index_placeholder) { + num = index_page->number; + } else { + assert(pdata->contents_entry->private_data); + target = (para_data *)pdata->contents_entry->private_data; + num = target->first->page->number; + } + + w = fake_word(num); + wid = paper_width_simple(pdata, w); + sfree(w); + + for (x = 0; x < conf->base_width; x += conf->leader_separation) + if (x - conf->leader_separation > last_x - conf->left_margin && + x + conf->leader_separation < conf->base_width - wid) + render_string(pdata->last->page, + pdata->fonts[FONT_NORMAL], + pdata->sizes[FONT_NORMAL], + conf->left_margin + x, + (conf->paper_height - conf->top_margin - + pdata->last->ypos), L"."); + + render_string(pdata->last->page, + pdata->fonts[FONT_NORMAL], + pdata->sizes[FONT_NORMAL], + conf->paper_width - conf->right_margin - wid, + (conf->paper_height - conf->top_margin - + pdata->last->ypos), num); + } + + /* + * Render any rectangle (chapter title underline or rule) + * that goes with this paragraph. + */ + switch (pdata->rect_type) { + case RECT_CHAPTER_UNDERLINE: + add_rect_to_page(pdata->last->page, + conf->left_margin, + (conf->paper_height - conf->top_margin - + pdata->last->ypos - + conf->chapter_underline_depth), + conf->base_width, + conf->chapter_underline_thickness); + break; + case RECT_RULE: + add_rect_to_page(pdata->first->page, + conf->left_margin + pdata->first->xpos, + (conf->paper_height - conf->top_margin - + pdata->last->ypos - + pdata->last->line_height), + conf->base_width - pdata->first->xpos, + pdata->last->line_height); + break; + default: /* placate gcc */ + break; + } +} + +static para_data *code_paragraph(int indent, word *words, paper_conf *conf) +{ + para_data *pdata = mknew(para_data); + + /* + * For code paragraphs, I'm going to hack grievously and + * pretend the three normal fonts are the three code paragraph + * fonts. + */ + pdata->fonts[FONT_NORMAL] = conf->cb; + pdata->fonts[FONT_EMPH] = conf->co; + pdata->fonts[FONT_CODE] = conf->cr; + pdata->sizes[FONT_NORMAL] = + pdata->sizes[FONT_EMPH] = + pdata->sizes[FONT_CODE] = 12; + + pdata->first = pdata->last = NULL; + pdata->outline_level = -1; + pdata->rect_type = RECT_NONE; + pdata->contents_entry = NULL; + pdata->justification = LEFT; + + for (; words; words = words->next) { + wchar_t *t, *e, *start; + word *lhead = NULL, *ltail = NULL, *w; + line_data *ldata; + int prev = -1, curr; + + t = words->text; + if (words->next && words->next->type == word_Emph) { + e = words->next->text; + words = words->next; + } else + e = NULL; + + start = t; + + while (*start) { + while (*t) { + if (!e || !*e) + curr = 0; + else if (*e == L'i') + curr = 1; + else if (*e == L'b') + curr = 2; + else + curr = 0; + + if (prev < 0) + prev = curr; + + if (curr != prev) + break; + + t++; + if (e && *e) + e++; + } + + /* + * We've isolated a maximal subsequence of the line + * which has the same emphasis. Form it into a word + * structure. + */ + w = mknew(word); + w->next = NULL; + w->alt = NULL; + w->type = (prev == 0 ? word_WeakCode : + prev == 1 ? word_Emph : word_Normal); + w->text = mknewa(wchar_t, t-start+1); + memcpy(w->text, start, (t-start) * sizeof(wchar_t)); + w->text[t-start] = '\0'; + w->breaks = FALSE; + + if (ltail) + ltail->next = w; + else + lhead = w; + ltail = w; + + start = t; + prev = -1; + } + + ldata = mknew(line_data); + + ldata->pdata = pdata; + ldata->first = lhead; + ldata->end = NULL; + ldata->line_height = conf->base_font_size * 4096; + + ldata->xpos = indent; + + if (pdata->last) { + pdata->last->next = ldata; + ldata->prev = pdata->last; + } else { + pdata->first = ldata; + ldata->prev = NULL; + } + ldata->next = NULL; + pdata->last = ldata; + + ldata->hshortfall = 0; + ldata->nspaces = 0; + ldata->aux_text = NULL; + ldata->aux_text_2 = NULL; + ldata->aux_left_indent = 0; + /* General opprobrium for breaking in a code paragraph. */ + ldata->penalty_before = ldata->penalty_after = 50000; + } + + standard_line_spacing(pdata, conf); + + return pdata; +} + +static para_data *rule_paragraph(int indent, paper_conf *conf) +{ + para_data *pdata = mknew(para_data); + line_data *ldata; + + ldata = mknew(line_data); + + ldata->pdata = pdata; + ldata->first = NULL; + ldata->end = NULL; + ldata->line_height = conf->rule_thickness; + + ldata->xpos = indent; + + ldata->prev = NULL; + ldata->next = NULL; + + ldata->hshortfall = 0; + ldata->nspaces = 0; + ldata->aux_text = NULL; + ldata->aux_text_2 = NULL; + ldata->aux_left_indent = 0; + + /* + * Better to break after a rule than before it + */ + ldata->penalty_after += 100000; + ldata->penalty_before += -100000; + + pdata->first = pdata->last = ldata; + pdata->outline_level = -1; + pdata->rect_type = RECT_RULE; + pdata->contents_entry = NULL; + pdata->justification = LEFT; + + standard_line_spacing(pdata, conf); + + return pdata; +} + +/* + * Plain-text-like formatting for outline titles. + */ +static void paper_rdaddw(rdstring *rs, word *text) { + for (; text; text = text->next) switch (text->type) { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdadd(rs, L'_'); /* FIXME: configurability */ + else if (towordstyle(text->type) == word_Code && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdadd(rs, L'\''); /* FIXME: configurability */ + if (removeattr(text->type) == word_Normal) { + rdadds(rs, text->text); + } else if (removeattr(text->type) == word_WhiteSpace) { + rdadd(rs, L' '); + } else if (removeattr(text->type) == word_Quote) { + rdadd(rs, L'\''); /* fixme: configurability */ + } + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdadd(rs, L'_'); /* FIXME: configurability */ + else if (towordstyle(text->type) == word_Code && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdadd(rs, L'\''); /* FIXME: configurability */ + break; + } +} + +static wchar_t *prepare_outline_title(word *first, wchar_t *separator, + word *second) +{ + rdstring rs = {0, 0, NULL}; + + if (first) + paper_rdaddw(&rs, first); + if (separator) + rdadds(&rs, separator); + if (second) + paper_rdaddw(&rs, second); + + return rs.text; +} + +static word *fake_word(wchar_t *text) +{ + word *ret = mknew(word); + ret->next = NULL; + ret->alt = NULL; + ret->type = word_Normal; + ret->text = ustrdup(text); + ret->breaks = FALSE; + ret->aux = 0; + return ret; +} + +static word *fake_space_word(void) +{ + word *ret = mknew(word); + ret->next = NULL; + ret->alt = NULL; + ret->type = word_WhiteSpace; + ret->text = NULL; + ret->breaks = TRUE; + ret->aux = 0; + return ret; +} + +static word *fake_page_ref(page_data *page) +{ + word *ret = mknew(word); + ret->next = NULL; + ret->alt = NULL; + ret->type = word_PageXref; + ret->text = NULL; + ret->breaks = FALSE; + ret->aux = 0; + ret->private_data = page; + return ret; +} + +static word *fake_end_ref(void) +{ + word *ret = mknew(word); + ret->next = NULL; + ret->alt = NULL; + ret->type = word_XrefEnd; + ret->text = NULL; + ret->breaks = FALSE; + ret->aux = 0; + return ret; +} + +static word *prepare_contents_title(word *first, wchar_t *separator, + word *second) { - if (ldata->aux_text) - render_text(ldata->page, ldata->pdata, left_x + ldata->aux_left_indent, - top_y - ldata->ypos, ldata->aux_text, NULL, 0); - render_text(ldata->page, ldata->pdata, left_x + ldata->xpos, - top_y - ldata->ypos, ldata->first, ldata->last, - ldata->space_adjust); + word *ret; + word **wptr, *w; + + wptr = &ret; + + if (first) { + w = dup_word_list(first); + *wptr = w; + while (w->next) + w = w->next; + wptr = &w->next; + } + + if (separator) { + w = fake_word(separator); + *wptr = w; + wptr = &w->next; + } + + if (second) { + *wptr = dup_word_list(second); + } + + return ret; +} + +static void fold_into_page(page_data *dest, page_data *src, int right_shift) +{ + line_data *ldata; + + if (!src->first_line) + return; + + if (dest->last_line) { + dest->last_line->next = src->first_line; + src->first_line->prev = dest->last_line; + } + dest->last_line = src->last_line; + + for (ldata = src->first_line; ldata; ldata = ldata->next) { + ldata->page = dest; + ldata->xpos += right_shift; + + if (ldata == src->last_line) + break; + } }