2 * PDF backend for Halibut
9 #define TREE_BRANCH 2 /* max branching factor in page tree */
11 paragraph
*pdf_config_filename(char *filename
)
14 wchar_t *ufilename
, *up
;
18 memset(p
, 0, sizeof(*p
));
19 p
->type
= para_Config
;
21 p
->fpos
.filename
= "<command line>";
22 p
->fpos
.line
= p
->fpos
.col
= -1;
24 ufilename
= ufroma_dup(filename
);
25 len
= ustrlen(ufilename
) + 2 + lenof(L
"pdf-filename");
26 p
->keyword
= mknewa(wchar_t, len
);
28 ustrcpy(up
, L
"pdf-filename");
30 ustrcpy(up
, ufilename
);
33 assert(up
- p
->keyword
< len
);
39 typedef struct object_Tag object
;
40 typedef struct objlist_Tag objlist
;
46 rdstringc main
, stream
;
56 static object
*new_object(objlist
*list
);
57 static void objtext(object
*o
, char const *text
);
58 static void objstream(object
*o
, char const *text
);
59 static void objref(object
*o
, object
*dest
);
61 static void make_pages_node(object
*node
, object
*parent
, page_data
*first
,
62 page_data
*last
, object
*resources
);
63 static int make_outline(object
*parent
, outline_element
*start
, int n
,
66 void pdf_backend(paragraph
*sourceform
, keywordlist
*keywords
,
67 indexdata
*idx
, void *vdoc
) {
68 document
*doc
= (document
*)vdoc
;
77 object
*o
, *cat
, *outlines
, *pages
, *resources
;
83 filename
= dupstr("output.pdf");
84 for (p
= sourceform
; p
; p
= p
->next
) {
85 if (p
->type
== para_Config
&& p
->parent
) {
86 if (!ustricmp(p
->keyword
, L
"pdf-filename")) {
88 filename
= utoa_dup(uadv(p
->keyword
));
93 olist
.head
= olist
.tail
= NULL
;
96 cat
= new_object(&olist
);
97 outlines
= new_object(&olist
);
98 pages
= new_object(&olist
);
99 resources
= new_object(&olist
);
102 * The catalogue just contains references to the outlines and
105 objtext(cat
, "<<\n/Type /Catalog\n/Outlines ");
106 objref(cat
, outlines
);
107 objtext(cat
, "\n/Pages ");
109 objtext(cat
, "\n/PageMode /UseOutlines\n>>\n");
112 * Set up the resources dictionary, which mostly means
113 * providing all the font objects and names to call them by.
116 objtext(resources
, "<<\n/Font <<\n");
117 for (fe
= doc
->fonts
->head
; fe
; fe
= fe
->next
) {
122 sprintf(fname
, "f%d", font_index
++);
123 fe
->name
= dupstr(fname
);
125 font
= new_object(&olist
);
127 objtext(resources
, "/");
128 objtext(resources
, fe
->name
);
129 objtext(resources
, " ");
130 objref(resources
, font
);
131 objtext(resources
, "\n");
133 objtext(font
, "<<\n/Type /Font\n/Subtype /Type1\n/Name /");
134 objtext(font
, fe
->name
);
135 objtext(font
, "\n/BaseFont /");
136 objtext(font
, fe
->font
->name
);
137 objtext(font
, "\n/Encoding <<\n/Type /Encoding\n/Differences [");
139 for (i
= 0; i
< 256; i
++) {
143 sprintf(buf
, "\n%d /", i
);
145 objtext(font
, fe
->vector
[i
] ? fe
->vector
[i
] : ".notdef");
148 objtext(font
, "\n]\n>>\n");
151 object
*widths
= new_object(&olist
);
152 objtext(font
, "/FirstChar 0\n/LastChar 255\n/Widths ");
153 objref(font
, widths
);
155 objtext(widths
, "[\n");
156 for (i
= 0; i
< 256; i
++) {
159 if (fe
->indices
[i
] < 0)
162 width
= fe
->font
->widths
[fe
->indices
[i
]];
163 sprintf(buf
, "%g\n", 1000.0 * width
/ 4096.0);
164 objtext(widths
, buf
);
166 objtext(widths
, "]\n");
169 objtext(font
, ">>\n");
171 objtext(resources
, ">>\n>>\n");
174 * Define the page objects for each page, and get each one
175 * ready to have a `Parent' specification added to it.
177 for (page
= doc
->pages
; page
; page
= page
->next
) {
180 opage
= new_object(&olist
);
182 objtext(opage
, "<<\n/Type /Page\n");
186 * Recursively build the page tree.
188 make_pages_node(pages
, NULL
, doc
->pages
, NULL
, resources
);
191 * Create and render the individual pages.
194 for (page
= doc
->pages
; page
; page
= page
->next
) {
195 object
*opage
, *cstr
;
200 opage
= (object
*)page
->spare
;
202 * At this point the page dictionary is already
203 * half-written, with /Type and /Parent already present. We
204 * continue from there.
208 * The PDF spec says /Resources is required, but also says
209 * that it's inheritable and may be omitted if it's present
210 * in a Pages node. In our case it is: it's present in the
211 * topmost /Pages node because we carefully put it there.
212 * So we don't need a /Resources entry here.
214 sprintf(buf
, "/MediaBox [0 0 %g %g]\n",
215 doc
->paper_width
/ 4096.0, doc
->paper_height
/ 4096.0);
219 * Now we're ready to define a content stream containing
220 * the actual text on the page.
222 cstr
= new_object(&olist
);
223 objtext(opage
, "/Contents ");
225 objtext(opage
, "\n");
228 * Render any rectangles on the page.
230 for (r
= page
->first_rect
; r
; r
= r
->next
) {
232 sprintf(buf
, "%g %g %g %g re f\n", r
->x
/ 4096.0,
233 r
->y
/ 4096.0, r
->w
/ 4096.0, r
->h
/ 4096.0);
234 objstream(cstr
, buf
);
237 objstream(cstr
, "BT\n");
238 for (frag
= page
->first_text
; frag
; frag
= frag
->next
) {
241 objstream(cstr
, "/");
242 objstream(cstr
, frag
->fe
->name
);
243 sprintf(buf
, " %d Tf 1 0 0 1 %g %g Tm (", frag
->fontsize
,
244 frag
->x
/4096.0, frag
->y
/4096.0);
245 objstream(cstr
, buf
);
247 for (c
= frag
->text
; *c
; c
++) {
248 if (*c
== '(' || *c
== ')' || *c
== '\\')
249 objstream(cstr
, "\\");
252 objstream(cstr
, buf
);
255 objstream(cstr
, ") Tj\n");
257 objstream(cstr
, "ET");
260 * Also, we want an annotation dictionary containing the
261 * cross-references from this page.
263 if (page
->first_xref
) {
265 objtext(opage
, "/Annots [\n");
267 for (xr
= page
->first_xref
; xr
; xr
= xr
->next
) {
271 annot
= new_object(&olist
);
272 objref(opage
, annot
);
273 objtext(opage
, "\n");
275 objtext(annot
, "<<\n/Type /Annot\n/Subtype /Link\n/Rect [");
276 sprintf(buf
, "%g %g %g %g",
277 xr
->lx
/ 4096.0, xr
->by
/ 4096.0,
278 xr
->rx
/ 4096.0, xr
->ty
/ 4096.0);
280 objtext(annot
, "]\n/Border [0 0 0]\n");
282 if (xr
->dest
.type
== PAGE
) {
283 objtext(annot
, "/Dest [");
284 objref(annot
, (object
*)xr
->dest
.page
->spare
);
285 objtext(annot
, " /XYZ null null null]\n");
289 objtext(annot
, "/A <<\n/Type /Action\n/S /URI\n/URI (");
290 for (p
= xr
->dest
.url
; *p
; p
++) {
294 if (*p
== '(' || *p
== ')' || *p
== '\\')
295 objtext(annot
, "\\");
298 objtext(annot
, ")\n>>\n");
301 objtext(annot
, ">>\n");
304 objtext(opage
, "]\n");
307 objtext(opage
, ">>\n");
311 * Set up the outlines dictionary.
317 objtext(outlines
, "<<\n/Type /Outlines\n");
318 topcount
= make_outline(outlines
, doc
->outline_elements
,
319 doc
->n_outline_elements
, TRUE
);
320 sprintf(buf
, "/Count %d\n>>\n", topcount
);
321 objtext(outlines
, buf
);
325 * Assemble the final linear form of every object.
327 for (o
= olist
.head
; o
; o
= o
->next
) {
328 rdstringc rs
= {0, 0, NULL
};
331 sprintf(text
, "%d 0 obj\n", o
->number
);
334 if (!o
->main
.text
&& o
->stream
.text
) {
335 sprintf(text
, "<<\n/Length %d\n>>\n", o
->stream
.pos
);
336 rdaddsc(&o
->main
, text
);
339 assert(o
->main
.text
);
340 rdaddsc(&rs
, o
->main
.text
);
343 if (rs
.text
[rs
.pos
-1] != '\n')
346 if (o
->stream
.text
) {
348 * FIXME: If we ever start compressing stream data then
349 * it will have zero bytes in it, so we'll have to be
350 * more careful than this.
352 rdaddsc(&rs
, "stream\n");
353 rdaddsc(&rs
, o
->stream
.text
);
354 rdaddsc(&rs
, "\nendstream\n");
355 sfree(o
->stream
.text
);
358 rdaddsc(&rs
, "endobj\n");
365 * Write out the PDF file.
368 fp
= fopen(filename
, "wb");
370 error(err_cantopenw
, filename
);
377 fileoff
= fprintf(fp
, "%%PDF-1.3\n");
382 for (o
= olist
.head
; o
; o
= o
->next
) {
383 o
->fileoff
= fileoff
;
384 fwrite(o
->final
, 1, o
->size
, fp
);
389 * Cross-reference table
391 fprintf(fp
, "xref\n");
392 assert(olist
.head
->number
== 1);
393 fprintf(fp
, "0 %d\n", olist
.tail
->number
+ 1);
394 fprintf(fp
, "0000000000 65535 f \n");
395 for (o
= olist
.head
; o
; o
= o
->next
) {
397 sprintf(entry
, "%010d 00000 n \n", o
->fileoff
);
398 assert(strlen(entry
) == 20);
405 fprintf(fp
, "trailer\n<<\n/Size %d\n/Root %d 0 R\n>>\n",
406 olist
.tail
->number
+ 1, cat
->number
);
407 fprintf(fp
, "startxref\n%d\n%%%%EOF\n", fileoff
);
414 static object
*new_object(objlist
*list
)
416 object
*obj
= mknew(object
);
420 obj
->main
.text
= NULL
;
421 obj
->main
.pos
= obj
->main
.size
= 0;
422 obj
->stream
.text
= NULL
;
423 obj
->stream
.pos
= obj
->stream
.size
= 0;
425 obj
->number
= list
->number
++;
429 list
->tail
->next
= obj
;
440 static void objtext(object
*o
, char const *text
)
442 rdaddsc(&o
->main
, text
);
445 static void objstream(object
*o
, char const *text
)
447 rdaddsc(&o
->stream
, text
);
450 static void objref(object
*o
, object
*dest
)
453 sprintf(buf
, "%d 0 R", dest
->number
);
454 rdaddsc(&o
->main
, buf
);
457 static void make_pages_node(object
*node
, object
*parent
, page_data
*first
,
458 page_data
*last
, object
*resources
)
464 objtext(node
, "<<\n/Type /Pages\n");
466 objtext(node
, "/Parent ");
467 objref(node
, parent
);
472 * Count the pages in this stretch, to see if there are few
473 * enough to reference directly.
476 for (page
= first
; page
; page
= page
->next
) {
482 sprintf(buf
, "/Count %d\n/Kids [\n", count
);
485 if (count
> TREE_BRANCH
) {
487 page_data
*thisfirst
, *thislast
;
491 for (i
= 0; i
< TREE_BRANCH
; i
++) {
492 int number
= (i
+1) * count
/ TREE_BRANCH
- i
* count
/ TREE_BRANCH
;
499 if (thisfirst
== thislast
) {
500 objref(node
, (object
*)thisfirst
->spare
);
501 objtext((object
*)thisfirst
->spare
, "/Parent ");
502 objref((object
*)thisfirst
->spare
, node
);
503 objtext((object
*)thisfirst
->spare
, "\n");
505 object
*newnode
= new_object(node
->list
);
506 make_pages_node(newnode
, node
, thisfirst
, thislast
, NULL
);
507 objref(node
, newnode
);
512 assert(thislast
== last
|| page
== NULL
);
515 for (page
= first
; page
; page
= page
->next
) {
516 objref(node
, (object
*)page
->spare
);
518 objtext((object
*)page
->spare
, "/Parent ");
519 objref((object
*)page
->spare
, node
);
520 objtext((object
*)page
->spare
, "\n");
526 objtext(node
, "]\n");
529 objtext(node
, "/Resources ");
530 objref(node
, resources
);
534 objtext(node
, ">>\n");
538 * In text on the page, PDF uses the PostScript font model, which
539 * means that glyphs are identified by PS strings and hence font
540 * encoding can be managed independently of the supplied encoding
541 * of the font. However, in the document outline, the PDF spec
542 * simply asks for ordinary text strings without mentioning what
543 * character set they are supposed to be interpreted in.
545 * Therefore, for the moment, I'm going to assume they're US-ASCII
546 * only. If anyone knows better, they should let me know :-/
548 static int pdf_convert(wchar_t *s
, char **result
) {
549 int doing
= (result
!= 0);
552 int plen
= 0, psize
= 0;
558 if (c
>= 32 && c
<= 126) {
562 /* Char is not OK. */
564 outc
= 0xBF; /* approximate the good old DEC `uh?' */
569 p
= resize(p
, psize
);
575 p
= resize(p
, plen
+1);
582 static void pdf_rdaddwc(rdstringc
*rs
, word
*text
) {
585 for (; text
; text
= text
->next
) switch (text
->type
) {
598 case word_WhiteSpace
:
601 case word_WkCodeSpace
:
605 case word_WkCodeQuote
:
606 assert(text
->type
!= word_CodeQuote
&&
607 text
->type
!= word_WkCodeQuote
);
608 if (towordstyle(text
->type
) == word_Emph
&&
609 (attraux(text
->aux
) == attr_First
||
610 attraux(text
->aux
) == attr_Only
))
611 rdaddc(rs
, '_'); /* FIXME: configurability */
612 else if (towordstyle(text
->type
) == word_Code
&&
613 (attraux(text
->aux
) == attr_First
||
614 attraux(text
->aux
) == attr_Only
))
615 rdaddc(rs
, '\''); /* FIXME: configurability */
616 if (removeattr(text
->type
) == word_Normal
) {
617 if (pdf_convert(text
->text
, &c
))
620 pdf_rdaddwc(rs
, text
->alt
);
622 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
624 } else if (removeattr(text
->type
) == word_Quote
) {
625 rdaddc(rs
, '\''); /* FIXME: configurability */
627 if (towordstyle(text
->type
) == word_Emph
&&
628 (attraux(text
->aux
) == attr_Last
||
629 attraux(text
->aux
) == attr_Only
))
630 rdaddc(rs
, '_'); /* FIXME: configurability */
631 else if (towordstyle(text
->type
) == word_Code
&&
632 (attraux(text
->aux
) == attr_Last
||
633 attraux(text
->aux
) == attr_Only
))
634 rdaddc(rs
, '\''); /* FIXME: configurability */
639 static int make_outline(object
*parent
, outline_element
*items
, int n
,
642 int level
, totalcount
= 0;
643 outline_element
*itemp
;
644 object
*curr
, *prev
= NULL
, *first
= NULL
, *last
= NULL
;
649 level
= items
->level
;
652 rdstringc rs
= {0, 0, NULL
};
656 * Here we expect to be sitting on an item at the given
657 * level. So we start by constructing an outline entry for
660 assert(items
->level
== level
);
662 if (level
== 1 && items
->para
->kwtext
) {
663 pdf_rdaddwc(&rs
, items
->para
->kwtext
);
665 } else if (level
> 1 && items
->para
->kwtext2
) {
666 pdf_rdaddwc(&rs
, items
->para
->kwtext2
);
669 pdf_rdaddwc(&rs
, items
->para
->words
);
672 curr
= new_object(parent
->list
);
673 if (!first
) first
= curr
;
675 objtext(curr
, "<<\n/Title (");
676 for (p
= rs
.text
; p
< rs
.text
+rs
.pos
; p
++) {
678 if (*p
== '\\' || *p
== '(' || *p
== ')')
684 objtext(curr
, ")\n/Parent ");
685 objref(curr
, parent
);
686 objtext(curr
, "\n/Dest [");
687 pdata
= (para_data
*)items
->para
->private_data
;
688 objref(curr
, (object
*)pdata
->first
->page
->spare
);
689 objtext(curr
, " /XYZ null null null]\n");
691 objtext(curr
, "/Prev ");
695 objtext(prev
, "/Next ");
697 objtext(prev
, "\n>>\n");
702 for (itemp
= items
; itemp
< items
+n
&& itemp
->level
> level
;
707 int count
= make_outline(curr
, items
, itemp
- items
, FALSE
);
712 sprintf(buf
, "/Count %d\n", count
);
719 objtext(prev
, ">>\n");
721 assert(first
&& last
);
722 objtext(parent
, "/First ");
723 objref(parent
, first
);
724 objtext(parent
, "\n/Last ");
725 objref(parent
, last
);
726 objtext(parent
, "\n");