X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/17c71b41790ba139de121fb3e987532ce3979dff..c923eb7f496826aa29015bbef4e855ec42a948c7:/bk_pdf.c diff --git a/bk_pdf.c b/bk_pdf.c index 75d1f83..3a0ec75 100644 --- a/bk_pdf.c +++ b/bk_pdf.c @@ -5,8 +5,9 @@ #include #include "halibut.h" #include "paper.h" +#include "deflate.h" -#define TREE_BRANCH 2 /* max branching factor in page tree */ +#define TREE_BRANCH 8 /* max branching factor in page tree */ paragraph *pdf_config_filename(char *filename) { @@ -33,14 +34,19 @@ struct objlist_Tag { static object *new_object(objlist *list); static void objtext(object *o, char const *text); static void objstream(object *o, char const *text); +static void objstream_len(object *o, char const *text, size_t len); static void pdf_string(void (*add)(object *, char const *), object *, char const *); static void pdf_string_len(void (*add)(object *, char const *), object *, char const *, int); static void objref(object *o, object *dest); +static void objdest(object *o, page_data *p); + +static int is_std_font(char const *name); static void make_pages_node(object *node, object *parent, page_data *first, - page_data *last, object *resources); + page_data *last, object *resources, + object *mediabox); static int make_outline(object *parent, outline_element *start, int n, int open); static int pdf_versionid(FILE *fp, word *words); @@ -56,7 +62,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, char *filename; paragraph *p; objlist olist; - object *o, *cat, *outlines, *pages, *resources; + object *o, *info, *cat, *outlines, *pages, *resources, *mediabox; int fileoff; IGNORE(keywords); @@ -75,6 +81,29 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, olist.head = olist.tail = NULL; olist.number = 1; + { + char buf[256]; + + info = new_object(&olist); + objtext(info, "<<\n"); + if (doc->n_outline_elements > 0) { + char *title; + int titlelen; + + title = + pdf_outline_convert(doc->outline_elements->pdata->outline_title, + &titlelen); + objtext(info, "/Title "); + pdf_string_len(objtext, info, title, titlelen); + sfree(title); + objtext(info, "\n"); + } + objtext(info, "/Producer "); + sprintf(buf, "Halibut, %s", version); + pdf_string(objtext, info, buf); + objtext(info, "\n>>\n"); + } + cat = new_object(&olist); if (doc->n_outline_elements > 0) outlines = new_object(&olist); @@ -85,7 +114,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * The catalogue just contains references to the outlines and - * pages objects. + * pages objects, and the pagelabels dictionary. */ objtext(cat, "<<\n/Type /Catalog"); if (outlines) { @@ -94,6 +123,8 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, } objtext(cat, "\n/Pages "); objref(cat, pages); + /* Halibut just numbers pages 1, 2, 3, ... */ + objtext(cat, "\n/PageLabels<>]>>"); if (outlines) objtext(cat, "\n/PageMode /UseOutlines"); objtext(cat, "\n>>\n"); @@ -103,10 +134,10 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, * providing all the font objects and names to call them by. */ font_index = 0; - objtext(resources, "<<\n/Font <<\n"); + objtext(resources, "<<\n/ProcSet [/PDF/Text]\n/Font <<\n"); for (fe = doc->fonts->head; fe; fe = fe->next) { char fname[40]; - int i; + int i, prev; object *font; sprintf(fname, "f%d", font_index++); @@ -123,43 +154,123 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, objtext(font, "<<\n/Type /Font\n/Subtype /Type1\n/Name /"); objtext(font, fe->name); objtext(font, "\n/BaseFont /"); - objtext(font, fe->font->name); + objtext(font, fe->font->info->name); objtext(font, "\n/Encoding <<\n/Type /Encoding\n/Differences ["); for (i = 0; i < 256; i++) { char buf[20]; - if (!fe->vector[i]) + if (fe->vector[i] == NOGLYPH) continue; - sprintf(buf, "\n%d /", i); - objtext(font, buf); - objtext(font, fe->vector[i] ? fe->vector[i] : ".notdef"); + if (i != prev + 1) { + sprintf(buf, "\n%d", i); + objtext(font, buf); + } + objtext(font, i % 8 ? "/" : "\n/"); + objtext(font, glyph_extern(fe->vector[i])); + prev = i; } objtext(font, "\n]\n>>\n"); - { +#define FF_FIXEDPITCH 0x00000001 +#define FF_SERIF 0x00000002 +#define FF_SYMBOLIC 0x00000004 +#define FF_SCRIPT 0x00000008 +#define FF_NONSYMBOLIC 0x00000020 +#define FF_ITALIC 0x00000040 +#define FF_ALLCAP 0x00010000 +#define FF_SMALLCAP 0x00020000 +#define FF_FORCEBOLD 0x00040000 + + if (!is_std_font(fe->font->info->name)){ object *widths = new_object(&olist); - objtext(font, "/FirstChar 0\n/LastChar 255\n/Widths "); + object *fontdesc = new_object(&olist); + int firstchar = -1, lastchar = -1; + char buf[80]; + font_info const *fi = fe->font->info; + int flags; + for (i = 0; i < 256; i++) + if (fe->vector[i] != NOGLYPH) { + if (firstchar < 0) firstchar = i; + lastchar = i; + } + sprintf(buf, "/FirstChar %d\n/LastChar %d\n/Widths ", + firstchar, lastchar); + objtext(font, buf); objref(font, widths); objtext(font, "\n"); objtext(widths, "[\n"); - for (i = 0; i < 256; i++) { - char buf[80]; + for (i = firstchar; i <= lastchar; i++) { double width; - if (fe->indices[i] < 0) + if (fe->vector[i] == NOGLYPH) width = 0.0; else - width = fe->font->widths[fe->indices[i]]; + width = find_width(fe->font, fe->vector[i]); sprintf(buf, "%g\n", 1000.0 * width / FUNITS_PER_PT); objtext(widths, buf); } objtext(widths, "]\n"); + objtext(font, "/FontDescriptor "); + objref(font, fontdesc); + objtext(fontdesc, "<<\n/Type /FontDescriptor\n/Name /"); + objtext(fontdesc, fi->name); + flags = 0; + if (fi->italicangle) flags |= FF_ITALIC; + flags |= FF_NONSYMBOLIC; + sprintf(buf, "\n/Flags %d\n", flags); + objtext(fontdesc, buf); + sprintf(buf, "/FontBBox [%g %g %g %g]\n", fi->fontbbox[0], + fi->fontbbox[1], fi->fontbbox[2], fi->fontbbox[3]); + objtext(fontdesc, buf); + sprintf(buf, "/ItalicAngle %g\n", fi->italicangle); + objtext(fontdesc, buf); + sprintf(buf, "/Ascent %g\n", fi->ascent); + objtext(fontdesc, buf); + sprintf(buf, "/Descent %g\n", fi->descent); + objtext(fontdesc, buf); + sprintf(buf, "/CapHeight %g\n", fi->capheight); + objtext(fontdesc, buf); + sprintf(buf, "/XHeight %g\n", fi->xheight); + objtext(fontdesc, buf); + sprintf(buf, "/StemH %g\n", fi->stemh); + objtext(fontdesc, buf); + sprintf(buf, "/StemV %g\n", fi->stemv); + objtext(fontdesc, buf); + if (fi->fp) { + object *fontfile = new_object(&olist); + size_t len; + char *ffbuf; + + pf_part1((font_info *)fi, &ffbuf, &len); + objstream_len(fontfile, ffbuf, len); + sfree(ffbuf); + sprintf(buf, "<<\n/Length1 %lu\n", (unsigned long)len); + objtext(fontfile, buf); + pf_part2((font_info *)fi, &ffbuf, &len); + objstream_len(fontfile, ffbuf, len); + sfree(ffbuf); + sprintf(buf, "/Length2 %lu\n", (unsigned long)len); + objtext(fontfile, buf); + objtext(fontfile, "/Length3 0\n"); + objtext(fontdesc, "/FontFile "); + objref(fontdesc, fontfile); + } + objtext(fontdesc, "\n>>\n"); } - objtext(font, ">>\n"); + objtext(font, "\n>>\n"); } objtext(resources, ">>\n>>\n"); + { + char buf[255]; + mediabox = new_object(&olist); + sprintf(buf, "[0 0 %g %g]\n", + doc->paper_width / FUNITS_PER_PT, + doc->paper_height / FUNITS_PER_PT); + objtext(mediabox, buf); + } + /* * Define the page objects for each page, and get each one * ready to have a `Parent' specification added to it. @@ -175,7 +286,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * Recursively build the page tree. */ - make_pages_node(pages, NULL, doc->pages, NULL, resources); + make_pages_node(pages, NULL, doc->pages, NULL, resources, mediabox); /* * Create and render the individual pages. @@ -200,12 +311,9 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, * that it's inheritable and may be omitted if it's present * in a Pages node. In our case it is: it's present in the * topmost /Pages node because we carefully put it there. - * So we don't need a /Resources entry here. + * So we don't need a /Resources entry here. The same applies + * to /MediaBox. */ - sprintf(buf, "/MediaBox [0 0 %g %g]\n", - doc->paper_width / FUNITS_PER_PT, - doc->paper_height / FUNITS_PER_PT); - objtext(opage, buf); /* * Now we're ready to define a content stream containing @@ -324,31 +432,25 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, objtext(opage, "/Annots [\n"); for (xr = page->first_xref; xr; xr = xr->next) { - object *annot; char buf[256]; - annot = new_object(&olist); - objref(opage, annot); - objtext(opage, "\n"); - - objtext(annot, "<<\n/Type /Annot\n/Subtype /Link\n/Rect ["); + objtext(opage, "<lx / FUNITS_PER_PT, xr->by / FUNITS_PER_PT, xr->rx / FUNITS_PER_PT, xr->ty / FUNITS_PER_PT); - objtext(annot, buf); - objtext(annot, "]\n/Border [0 0 0]\n"); + objtext(opage, buf); + objtext(opage, "]/Border[0 0 0]\n"); if (xr->dest.type == PAGE) { - objtext(annot, "/Dest ["); - objref(annot, (object *)xr->dest.page->spare); - objtext(annot, " /XYZ null null null]\n"); + objtext(opage, "/Dest"); + objdest(opage, xr->dest.page); } else { - objtext(annot, "/A <<\n/Type /Action\n/S /URI\n/URI "); - pdf_string(objtext, annot, xr->dest.url); - objtext(annot, "\n>>\n"); + objtext(opage, "/A<dest.url); + objtext(opage, ">>"); } - objtext(annot, ">>\n"); + objtext(opage, ">>\n"); } objtext(opage, "]\n"); @@ -377,12 +479,21 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, for (o = olist.head; o; o = o->next) { rdstringc rs = {0, 0, NULL}; char text[80]; + deflate_compress_ctx *zcontext; + void *zbuf; + int zlen; sprintf(text, "%d 0 obj\n", o->number); rdaddsc(&rs, text); - if (!o->main.text && o->stream.text) { - sprintf(text, "<<\n/Length %d\n>>\n", o->stream.pos); + if (o->stream.text) { + zcontext = deflate_compress_new(DEFLATE_TYPE_ZLIB); + deflate_compress_data(zcontext, o->stream.text, o->stream.pos, + DEFLATE_END_OF_DATA, &zbuf, &zlen); + deflate_compress_free(zcontext); + if (!o->main.text) + rdaddsc(&o->main, "<<\n"); + sprintf(text, "/Filter/FlateDecode\n/Length %d\n>>\n", zlen); rdaddsc(&o->main, text); } @@ -394,15 +505,11 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, rdaddc(&rs, '\n'); if (o->stream.text) { - /* - * FIXME: If we ever start compressing stream data then - * it will have zero bytes in it, so we'll have to be - * more careful than this. - */ rdaddsc(&rs, "stream\n"); - rdaddsc(&rs, o->stream.text); + rdaddsn(&rs, zbuf, zlen); rdaddsc(&rs, "\nendstream\n"); sfree(o->stream.text); + sfree(zbuf); } rdaddsc(&rs, "endobj\n"); @@ -423,9 +530,11 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * Header. I'm going to put the version IDs in the header as - * well, simply in PDF comments. + * well, simply in PDF comments. The PDF Reference also suggests + * that binary PDF files contain four top-bit-set characters in + * the second line. */ - fileoff = fprintf(fp, "%%PDF-1.3\n"); + fileoff = fprintf(fp, "%%PDF-1.3\n%% L\xc3\xba\xc3\xb0""a\n"); for (p = sourceform; p; p = p->next) if (p->type == para_VersionID) fileoff += pdf_versionid(fp, p->words); @@ -456,8 +565,8 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * Trailer */ - fprintf(fp, "trailer\n<<\n/Size %d\n/Root %d 0 R\n>>\n", - olist.tail->number + 1, cat->number); + fprintf(fp, "trailer\n<<\n/Size %d\n/Root %d 0 R\n/Info %d 0 R\n>>\n", + olist.tail->number + 1, cat->number, info->number); fprintf(fp, "startxref\n%d\n%%%%EOF\n", fileoff); fclose(fp); @@ -496,6 +605,11 @@ static void objtext(object *o, char const *text) rdaddsc(&o->main, text); } +static void objstream_len(object *o, char const *text, size_t len) +{ + rdaddsn(&o->stream, text, len); +} + static void objstream(object *o, char const *text) { rdaddsc(&o->stream, text); @@ -508,8 +622,30 @@ static void objref(object *o, object *dest) rdaddsc(&o->main, buf); } +static void objdest(object *o, page_data *p) { + objtext(o, "["); + objref(o, (object *)p->spare); + objtext(o, "/XYZ null null null]"); +} + +static char const * const stdfonts[] = { + "Times-Roman", "Times-Bold", "Times-Italic", "Times-BoldItalic", + "Helvetica", "Helvetica-Bold", "Helvetica-Oblique","Helvetica-BoldOblique", + "Courier", "Courier-Bold", "Courier-Oblique", "Courier-BoldOblique", + "Symbol", "ZapfDingbats" +}; + +static int is_std_font(char const *name) { + unsigned i; + for (i = 0; i < lenof(stdfonts); i++) + if (strcmp(name, stdfonts[i]) == 0) + return TRUE; + return FALSE; +} + static void make_pages_node(object *node, object *parent, page_data *first, - page_data *last, object *resources) + page_data *last, object *resources, + object *mediabox) { int count; page_data *page; @@ -557,7 +693,8 @@ static void make_pages_node(object *node, object *parent, page_data *first, objtext((object *)thisfirst->spare, "\n"); } else { object *newnode = new_object(node->list); - make_pages_node(newnode, node, thisfirst, thislast, NULL); + make_pages_node(newnode, node, thisfirst, thislast, + NULL, NULL); objref(node, newnode); } objtext(node, "\n"); @@ -584,6 +721,11 @@ static void make_pages_node(object *node, object *parent, page_data *first, objref(node, resources); objtext(node, "\n"); } + if (mediabox) { + objtext(node, "/MediaBox "); + objref(node, mediabox); + objtext(node, "\n"); + } objtext(node, ">>\n"); } @@ -596,7 +738,7 @@ static void make_pages_node(object *node, object *parent, page_data *first, * encodes in either PDFDocEncoding (a custom superset of * ISO-8859-1) or UTF-16BE. */ -static char *pdf_outline_convert(wchar_t *s, int *len) { +char *pdf_outline_convert(wchar_t *s, int *len) { char *ret; ret = utoa_careful_dup(s, CS_PDF); @@ -651,11 +793,12 @@ static int make_outline(object *parent, outline_element *items, int n, last = curr; objtext(curr, "<<\n/Title "); pdf_string_len(objtext, curr, title, titlelen); + sfree(title); objtext(curr, "\n/Parent "); objref(curr, parent); - objtext(curr, "\n/Dest ["); - objref(curr, (object *)items->pdata->first->page->spare); - objtext(curr, " /XYZ null null null]\n"); + objtext(curr, "\n/Dest"); + objdest(curr, items->pdata->first->page); + objtext(curr, "\n"); if (prev) { objtext(curr, "/Prev "); objref(curr, prev);