X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/993ade7a1fcb7b4b77c1011348df290822e26a5e..2729eafc11bc11588764711335273a15d8cc226c:/bk_pdf.c diff --git a/bk_pdf.c b/bk_pdf.c index 8eda320..74944f0 100644 --- a/bk_pdf.c +++ b/bk_pdf.c @@ -5,8 +5,9 @@ #include #include "halibut.h" #include "paper.h" +#include "deflate.h" -#define TREE_BRANCH 2 /* max branching factor in page tree */ +#define TREE_BRANCH 8 /* max branching factor in page tree */ paragraph *pdf_config_filename(char *filename) { @@ -33,12 +34,15 @@ struct objlist_Tag { static object *new_object(objlist *list); static void objtext(object *o, char const *text); static void objstream(object *o, char const *text); +static void objstream_len(object *o, char const *text, size_t len); static void pdf_string(void (*add)(object *, char const *), object *, char const *); static void pdf_string_len(void (*add)(object *, char const *), object *, char const *, int); static void objref(object *o, object *dest); -static char *pdf_outline_convert(wchar_t *s, int *len); +static void objdest(object *o, page_data *p); + +static int is_std_font(char const *name); static void make_pages_node(object *node, object *parent, page_data *first, page_data *last, object *resources, @@ -110,7 +114,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * The catalogue just contains references to the outlines and - * pages objects. + * pages objects, and the pagelabels dictionary. */ objtext(cat, "<<\n/Type /Catalog"); if (outlines) { @@ -119,6 +123,8 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, } objtext(cat, "\n/Pages "); objref(cat, pages); + /* Halibut just numbers pages 1, 2, 3, ... */ + objtext(cat, "\n/PageLabels<>]>>"); if (outlines) objtext(cat, "\n/PageMode /UseOutlines"); objtext(cat, "\n>>\n"); @@ -128,10 +134,10 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, * providing all the font objects and names to call them by. */ font_index = 0; - objtext(resources, "<<\n/Font <<\n"); + objtext(resources, "<<\n/ProcSet [/PDF/Text]\n/Font <<\n"); for (fe = doc->fonts->head; fe; fe = fe->next) { char fname[40]; - int i; + int i, prev; object *font; sprintf(fname, "f%d", font_index++); @@ -148,40 +154,111 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, objtext(font, "<<\n/Type /Font\n/Subtype /Type1\n/Name /"); objtext(font, fe->name); objtext(font, "\n/BaseFont /"); - objtext(font, fe->font->name); + objtext(font, fe->font->info->name); objtext(font, "\n/Encoding <<\n/Type /Encoding\n/Differences ["); for (i = 0; i < 256; i++) { char buf[20]; - if (!fe->vector[i]) + if (fe->vector[i] == NOGLYPH) continue; - sprintf(buf, "\n%d /", i); - objtext(font, buf); - objtext(font, fe->vector[i] ? fe->vector[i] : ".notdef"); + if (i != prev + 1) { + sprintf(buf, "\n%d", i); + objtext(font, buf); + } + objtext(font, i % 8 ? "/" : "\n/"); + objtext(font, glyph_extern(fe->vector[i])); + prev = i; } objtext(font, "\n]\n>>\n"); - { +#define FF_FIXEDPITCH 0x00000001 +#define FF_SERIF 0x00000002 +#define FF_SYMBOLIC 0x00000004 +#define FF_SCRIPT 0x00000008 +#define FF_NONSYMBOLIC 0x00000020 +#define FF_ITALIC 0x00000040 +#define FF_ALLCAP 0x00010000 +#define FF_SMALLCAP 0x00020000 +#define FF_FORCEBOLD 0x00040000 + + if (!is_std_font(fe->font->info->name)){ object *widths = new_object(&olist); - objtext(font, "/FirstChar 0\n/LastChar 255\n/Widths "); + object *fontdesc = new_object(&olist); + int firstchar = -1, lastchar = -1; + char buf[80]; + font_info const *fi = fe->font->info; + int flags; + for (i = 0; i < 256; i++) + if (fe->vector[i] != NOGLYPH) { + if (firstchar < 0) firstchar = i; + lastchar = i; + } + sprintf(buf, "/FirstChar %d\n/LastChar %d\n/Widths ", + firstchar, lastchar); + objtext(font, buf); objref(font, widths); objtext(font, "\n"); objtext(widths, "[\n"); - for (i = 0; i < 256; i++) { - char buf[80]; + for (i = firstchar; i <= lastchar; i++) { double width; - if (fe->indices[i] < 0) + if (fe->vector[i] == NOGLYPH) width = 0.0; else - width = fe->font->widths[fe->indices[i]]; + width = find_width(fe->font, fe->vector[i]); sprintf(buf, "%g\n", 1000.0 * width / FUNITS_PER_PT); objtext(widths, buf); } objtext(widths, "]\n"); + objtext(font, "/FontDescriptor "); + objref(font, fontdesc); + objtext(fontdesc, "<<\n/Type /FontDescriptor\n/Name /"); + objtext(fontdesc, fi->name); + flags = 0; + if (fi->italicangle) flags |= FF_ITALIC; + flags |= FF_NONSYMBOLIC; + sprintf(buf, "\n/Flags %d\n", flags); + objtext(fontdesc, buf); + sprintf(buf, "/FontBBox [%g %g %g %g]\n", fi->fontbbox[0], + fi->fontbbox[1], fi->fontbbox[2], fi->fontbbox[3]); + objtext(fontdesc, buf); + sprintf(buf, "/ItalicAngle %g\n", fi->italicangle); + objtext(fontdesc, buf); + sprintf(buf, "/Ascent %g\n", fi->ascent); + objtext(fontdesc, buf); + sprintf(buf, "/Descent %g\n", fi->descent); + objtext(fontdesc, buf); + sprintf(buf, "/CapHeight %g\n", fi->capheight); + objtext(fontdesc, buf); + sprintf(buf, "/XHeight %g\n", fi->xheight); + objtext(fontdesc, buf); + sprintf(buf, "/StemH %g\n", fi->stemh); + objtext(fontdesc, buf); + sprintf(buf, "/StemV %g\n", fi->stemv); + objtext(fontdesc, buf); + if (fi->fontfile && fi->filetype == TYPE1) { + object *fontfile = new_object(&olist); + size_t len; + char *ffbuf; + + pf_part1((font_info *)fi, &ffbuf, &len); + objstream_len(fontfile, ffbuf, len); + sfree(ffbuf); + sprintf(buf, "<<\n/Length1 %lu\n", (unsigned long)len); + objtext(fontfile, buf); + pf_part2((font_info *)fi, &ffbuf, &len); + objstream_len(fontfile, ffbuf, len); + sfree(ffbuf); + sprintf(buf, "/Length2 %lu\n", (unsigned long)len); + objtext(fontfile, buf); + objtext(fontfile, "/Length3 0\n"); + objtext(fontdesc, "/FontFile "); + objref(fontdesc, fontfile); + } + objtext(fontdesc, "\n>>\n"); } - objtext(font, ">>\n"); + objtext(font, "\n>>\n"); } objtext(resources, ">>\n>>\n"); @@ -355,31 +432,25 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, objtext(opage, "/Annots [\n"); for (xr = page->first_xref; xr; xr = xr->next) { - object *annot; char buf[256]; - annot = new_object(&olist); - objref(opage, annot); - objtext(opage, "\n"); - - objtext(annot, "<<\n/Type /Annot\n/Subtype /Link\n/Rect ["); + objtext(opage, "<lx / FUNITS_PER_PT, xr->by / FUNITS_PER_PT, xr->rx / FUNITS_PER_PT, xr->ty / FUNITS_PER_PT); - objtext(annot, buf); - objtext(annot, "]\n/Border [0 0 0]\n"); + objtext(opage, buf); + objtext(opage, "]/Border[0 0 0]\n"); if (xr->dest.type == PAGE) { - objtext(annot, "/Dest ["); - objref(annot, (object *)xr->dest.page->spare); - objtext(annot, " /XYZ null null null]\n"); + objtext(opage, "/Dest"); + objdest(opage, xr->dest.page); } else { - objtext(annot, "/A <<\n/Type /Action\n/S /URI\n/URI "); - pdf_string(objtext, annot, xr->dest.url); - objtext(annot, "\n>>\n"); + objtext(opage, "/A<dest.url); + objtext(opage, ">>"); } - objtext(annot, ">>\n"); + objtext(opage, ">>\n"); } objtext(opage, "]\n"); @@ -408,12 +479,28 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, for (o = olist.head; o; o = o->next) { rdstringc rs = {0, 0, NULL}; char text[80]; + deflate_compress_ctx *zcontext; + void *zbuf; + int zlen; sprintf(text, "%d 0 obj\n", o->number); rdaddsc(&rs, text); - if (!o->main.text && o->stream.text) { - sprintf(text, "<<\n/Length %d\n>>\n", o->stream.pos); + if (o->stream.text) { + if (!o->main.text) + rdaddsc(&o->main, "<<\n"); +#ifdef PDF_NOCOMPRESS + zlen = o->stream.pos; + zbuf = snewn(zlen, char); + memcpy(zbuf, o->stream.text, zlen); + sprintf(text, "/Length %d\n>>\n", zlen); +#else + zcontext = deflate_compress_new(DEFLATE_TYPE_ZLIB); + deflate_compress_data(zcontext, o->stream.text, o->stream.pos, + DEFLATE_END_OF_DATA, &zbuf, &zlen); + deflate_compress_free(zcontext); + sprintf(text, "/Filter/FlateDecode\n/Length %d\n>>\n", zlen); +#endif rdaddsc(&o->main, text); } @@ -425,15 +512,11 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, rdaddc(&rs, '\n'); if (o->stream.text) { - /* - * FIXME: If we ever start compressing stream data then - * it will have zero bytes in it, so we'll have to be - * more careful than this. - */ rdaddsc(&rs, "stream\n"); - rdaddsc(&rs, o->stream.text); + rdaddsn(&rs, zbuf, zlen); rdaddsc(&rs, "\nendstream\n"); sfree(o->stream.text); + sfree(zbuf); } rdaddsc(&rs, "endobj\n"); @@ -454,9 +537,11 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * Header. I'm going to put the version IDs in the header as - * well, simply in PDF comments. + * well, simply in PDF comments. The PDF Reference also suggests + * that binary PDF files contain four top-bit-set characters in + * the second line. */ - fileoff = fprintf(fp, "%%PDF-1.3\n"); + fileoff = fprintf(fp, "%%PDF-1.3\n%% L\xc3\xba\xc3\xb0""a\n"); for (p = sourceform; p; p = p->next) if (p->type == para_VersionID) fileoff += pdf_versionid(fp, p->words); @@ -527,6 +612,11 @@ static void objtext(object *o, char const *text) rdaddsc(&o->main, text); } +static void objstream_len(object *o, char const *text, size_t len) +{ + rdaddsn(&o->stream, text, len); +} + static void objstream(object *o, char const *text) { rdaddsc(&o->stream, text); @@ -539,6 +629,27 @@ static void objref(object *o, object *dest) rdaddsc(&o->main, buf); } +static void objdest(object *o, page_data *p) { + objtext(o, "["); + objref(o, (object *)p->spare); + objtext(o, "/XYZ null null null]"); +} + +static char const * const stdfonts[] = { + "Times-Roman", "Times-Bold", "Times-Italic", "Times-BoldItalic", + "Helvetica", "Helvetica-Bold", "Helvetica-Oblique","Helvetica-BoldOblique", + "Courier", "Courier-Bold", "Courier-Oblique", "Courier-BoldOblique", + "Symbol", "ZapfDingbats" +}; + +static int is_std_font(char const *name) { + unsigned i; + for (i = 0; i < lenof(stdfonts); i++) + if (strcmp(name, stdfonts[i]) == 0) + return TRUE; + return FALSE; +} + static void make_pages_node(object *node, object *parent, page_data *first, page_data *last, object *resources, object *mediabox) @@ -634,7 +745,7 @@ static void make_pages_node(object *node, object *parent, page_data *first, * encodes in either PDFDocEncoding (a custom superset of * ISO-8859-1) or UTF-16BE. */ -static char *pdf_outline_convert(wchar_t *s, int *len) { +char *pdf_outline_convert(wchar_t *s, int *len) { char *ret; ret = utoa_careful_dup(s, CS_PDF); @@ -692,9 +803,9 @@ static int make_outline(object *parent, outline_element *items, int n, sfree(title); objtext(curr, "\n/Parent "); objref(curr, parent); - objtext(curr, "\n/Dest ["); - objref(curr, (object *)items->pdata->first->page->spare); - objtext(curr, " /XYZ null null null]\n"); + objtext(curr, "\n/Dest"); + objdest(curr, items->pdata->first->page); + objtext(curr, "\n"); if (prev) { objtext(curr, "/Prev "); objref(curr, prev);