2 * PDF backend for Halibut
9 #define TREE_BRANCH 2 /* max branching factor in page tree */
11 paragraph
*pdf_config_filename(char *filename
)
14 wchar_t *ufilename
, *up
;
18 memset(p
, 0, sizeof(*p
));
19 p
->type
= para_Config
;
21 p
->fpos
.filename
= "<command line>";
22 p
->fpos
.line
= p
->fpos
.col
= -1;
24 ufilename
= ufroma_dup(filename
);
25 len
= ustrlen(ufilename
) + 2 + lenof(L
"pdf-filename");
26 p
->keyword
= mknewa(wchar_t, len
);
28 ustrcpy(up
, L
"pdf-filename");
30 ustrcpy(up
, ufilename
);
33 assert(up
- p
->keyword
< len
);
39 typedef struct object_Tag object
;
40 typedef struct objlist_Tag objlist
;
46 rdstringc main
, stream
;
56 static object
*new_object(objlist
*list
);
57 static void objtext(object
*o
, char const *text
);
58 static void objstream(object
*o
, char const *text
);
59 static void objref(object
*o
, object
*dest
);
61 static void make_pages_node(object
*node
, object
*parent
, page_data
*first
,
62 page_data
*last
, object
*resources
);
64 void pdf_backend(paragraph
*sourceform
, keywordlist
*keywords
,
65 indexdata
*idx
, void *vdoc
) {
66 document
*doc
= (document
*)vdoc
;
75 object
*o
, *cat
, *outlines
, *pages
, *resources
;
81 filename
= dupstr("output.pdf");
82 for (p
= sourceform
; p
; p
= p
->next
) {
83 p
->private_data
= NULL
;
84 if (p
->type
== para_Config
&& p
->parent
) {
85 if (!ustricmp(p
->keyword
, L
"pdf-filename")) {
87 filename
= utoa_dup(uadv(p
->keyword
));
92 olist
.head
= olist
.tail
= NULL
;
95 cat
= new_object(&olist
);
96 outlines
= new_object(&olist
);
97 pages
= new_object(&olist
);
98 resources
= new_object(&olist
);
101 * We currently don't support outlines, so here's a null
102 * outlines dictionary.
104 objtext(outlines
, "<<\n/Type Outlines\n/Count 0\n>>\n");
107 * The catalogue just contains references to the outlines and
110 objtext(cat
, "<<\n/Type /Catalog\n/Outlines ");
111 objref(cat
, outlines
);
112 objtext(cat
, "\n/Pages ");
114 objtext(cat
, "\n>>\n");
117 * Set up the resources dictionary, which mostly means
118 * providing all the font objects and names to call them by.
121 objtext(resources
, "<<\n/Font <<\n");
122 for (fe
= doc
->fonts
->head
; fe
; fe
= fe
->next
) {
127 sprintf(fname
, "f%d", font_index
++);
128 fe
->name
= dupstr(fname
);
130 font
= new_object(&olist
);
132 objtext(resources
, "/");
133 objtext(resources
, fe
->name
);
134 objtext(resources
, " ");
135 objref(resources
, font
);
136 objtext(resources
, "\n");
138 objtext(font
, "<<\n/Type /Font\n/Subtype /Type1\n/Name /");
139 objtext(font
, fe
->name
);
140 objtext(font
, "\n/BaseFont /");
141 objtext(font
, fe
->font
->name
);
142 objtext(font
, "\n/Encoding <<\n/Type /Encoding\n/Differences [");
144 for (i
= 0; i
< 256; i
++) {
148 sprintf(buf
, "\n%d /", i
);
150 objtext(font
, fe
->vector
[i
] ? fe
->vector
[i
] : ".notdef");
153 objtext(font
, "\n]\n>>\n");
156 object
*widths
= new_object(&olist
);
157 objtext(font
, "/FirstChar 0\n/LastChar 255\n/Widths ");
158 objref(font
, widths
);
160 objtext(widths
, "[\n");
161 for (i
= 0; i
< 256; i
++) {
164 if (fe
->indices
[i
] < 0)
167 width
= fe
->font
->widths
[fe
->indices
[i
]];
168 sprintf(buf
, "%g\n", 1000.0 * width
/ 4096.0);
169 objtext(widths
, buf
);
171 objtext(widths
, "]\n");
174 objtext(font
, ">>\n");
176 objtext(resources
, ">>\n>>\n");
179 * Define the page objects for each page, and get each one
180 * ready to have a `Parent' specification added to it.
182 for (page
= doc
->pages
; page
; page
= page
->next
) {
185 opage
= new_object(&olist
);
187 objtext(opage
, "<<\n/Type /Page\n");
191 * Recursively build the page tree.
193 make_pages_node(pages
, NULL
, doc
->pages
, NULL
, resources
);
196 * Create and render the individual pages.
199 for (page
= doc
->pages
; page
; page
= page
->next
) {
200 object
*opage
, *cstr
;
205 opage
= (object
*)page
->spare
;
207 * At this point the page dictionary is already
208 * half-written, with /Type and /Parent already present. We
209 * continue from there.
213 * The PDF spec says /Resources is required, but also says
214 * that it's inheritable and may be omitted if it's present
215 * in a Pages node. In our case it is: it's present in the
216 * topmost /Pages node because we carefully put it there.
217 * So we don't need a /Resources entry here.
219 sprintf(buf
, "/MediaBox [0 0 %g %g]\n",
220 doc
->paper_width
/ 4096.0, doc
->paper_height
/ 4096.0);
224 * Now we're ready to define a content stream containing
225 * the actual text on the page.
227 cstr
= new_object(&olist
);
228 objtext(opage
, "/Contents ");
230 objtext(opage
, "\n");
233 * Render any rectangles on the page.
235 for (r
= page
->first_rect
; r
; r
= r
->next
) {
237 sprintf(buf
, "%g %g %g %g re f\n", r
->x
/ 4096.0,
238 r
->y
/ 4096.0, r
->w
/ 4096.0, r
->h
/ 4096.0);
239 objstream(cstr
, buf
);
242 objstream(cstr
, "BT\n");
243 for (frag
= page
->first_text
; frag
; frag
= frag
->next
) {
246 objstream(cstr
, "/");
247 objstream(cstr
, frag
->fe
->name
);
248 sprintf(buf
, " %d Tf 1 0 0 1 %g %g Tm (", frag
->fontsize
,
249 frag
->x
/4096.0, frag
->y
/4096.0);
250 objstream(cstr
, buf
);
252 for (c
= frag
->text
; *c
; c
++) {
253 if (*c
== '(' || *c
== ')' || *c
== '\\')
254 objstream(cstr
, "\\");
257 objstream(cstr
, buf
);
260 objstream(cstr
, ") Tj\n");
262 objstream(cstr
, "ET");
265 * Also, we want an annotation dictionary containing the
266 * cross-references from this page.
268 if (page
->first_xref
) {
270 objtext(opage
, "/Annots [\n");
272 for (xr
= page
->first_xref
; xr
; xr
= xr
->next
) {
276 annot
= new_object(&olist
);
277 objref(opage
, annot
);
278 objtext(opage
, "\n");
280 objtext(annot
, "<<\n/Type /Annot\n/Subtype /Link\n/Rect [");
281 sprintf(buf
, "%g %g %g %g",
282 xr
->lx
/ 4096.0, xr
->by
/ 4096.0,
283 xr
->rx
/ 4096.0, xr
->ty
/ 4096.0);
285 objtext(annot
, "]\n/Border [0 0 0]\n");
287 if (xr
->dest
.type
== PAGE
) {
288 objtext(annot
, "/Dest [");
289 objref(annot
, (object
*)xr
->dest
.page
->spare
);
290 objtext(annot
, " /XYZ null null null]\n");
294 objtext(annot
, "/A <<\n/Type /Action\n/S /URI\n/URI (");
295 for (p
= xr
->dest
.url
; *p
; p
++) {
299 if (*p
== '(' || *p
== ')' || *p
== '\\')
300 objtext(annot
, "\\");
303 objtext(annot
, ")\n>>\n");
306 objtext(annot
, ">>\n");
309 objtext(opage
, "]\n");
312 objtext(opage
, ">>\n");
316 * Assemble the final linear form of every object.
318 for (o
= olist
.head
; o
; o
= o
->next
) {
319 rdstringc rs
= {0, 0, NULL
};
322 sprintf(text
, "%d 0 obj\n", o
->number
);
325 if (!o
->main
.text
&& o
->stream
.text
) {
326 sprintf(text
, "<<\n/Length %d\n>>\n", o
->stream
.pos
);
327 rdaddsc(&o
->main
, text
);
330 assert(o
->main
.text
);
331 rdaddsc(&rs
, o
->main
.text
);
334 if (rs
.text
[rs
.pos
-1] != '\n')
337 if (o
->stream
.text
) {
339 * FIXME: If we ever start compressing stream data then
340 * it will have zero bytes in it, so we'll have to be
341 * more careful than this.
343 rdaddsc(&rs
, "stream\n");
344 rdaddsc(&rs
, o
->stream
.text
);
345 rdaddsc(&rs
, "\nendstream\n");
346 sfree(o
->stream
.text
);
349 rdaddsc(&rs
, "endobj\n");
356 * Write out the PDF file.
359 fp
= fopen(filename
, "wb");
361 error(err_cantopenw
, filename
);
368 fileoff
= fprintf(fp
, "%%PDF-1.3\n");
373 for (o
= olist
.head
; o
; o
= o
->next
) {
374 o
->fileoff
= fileoff
;
375 fwrite(o
->final
, 1, o
->size
, fp
);
380 * Cross-reference table
382 fprintf(fp
, "xref\n");
383 assert(olist
.head
->number
== 1);
384 fprintf(fp
, "0 %d\n", olist
.tail
->number
+ 1);
385 fprintf(fp
, "0000000000 65535 f \n");
386 for (o
= olist
.head
; o
; o
= o
->next
) {
388 sprintf(entry
, "%010d 00000 n \n", o
->fileoff
);
389 assert(strlen(entry
) == 20);
396 fprintf(fp
, "trailer\n<<\n/Size %d\n/Root %d 0 R\n>>\n",
397 olist
.tail
->number
+ 1, cat
->number
);
398 fprintf(fp
, "startxref\n%d\n%%%%EOF\n", fileoff
);
405 static object
*new_object(objlist
*list
)
407 object
*obj
= mknew(object
);
411 obj
->main
.text
= NULL
;
412 obj
->main
.pos
= obj
->main
.size
= 0;
413 obj
->stream
.text
= NULL
;
414 obj
->stream
.pos
= obj
->stream
.size
= 0;
416 obj
->number
= list
->number
++;
420 list
->tail
->next
= obj
;
431 static void objtext(object
*o
, char const *text
)
433 rdaddsc(&o
->main
, text
);
436 static void objstream(object
*o
, char const *text
)
438 rdaddsc(&o
->stream
, text
);
441 static void objref(object
*o
, object
*dest
)
444 sprintf(buf
, "%d 0 R", dest
->number
);
445 rdaddsc(&o
->main
, buf
);
448 static void make_pages_node(object
*node
, object
*parent
, page_data
*first
,
449 page_data
*last
, object
*resources
)
455 objtext(node
, "<<\n/Type /Pages\n");
457 objtext(node
, "/Parent ");
458 objref(node
, parent
);
463 * Count the pages in this stretch, to see if there are few
464 * enough to reference directly.
467 for (page
= first
; page
; page
= page
->next
) {
473 sprintf(buf
, "/Count %d\n/Kids [\n", count
);
476 if (count
> TREE_BRANCH
) {
478 page_data
*thisfirst
, *thislast
;
482 for (i
= 0; i
< TREE_BRANCH
; i
++) {
483 int number
= (i
+1) * count
/ TREE_BRANCH
- i
* count
/ TREE_BRANCH
;
490 if (thisfirst
== thislast
) {
491 objref(node
, (object
*)thisfirst
->spare
);
492 objtext((object
*)thisfirst
->spare
, "/Parent ");
493 objref((object
*)thisfirst
->spare
, node
);
494 objtext((object
*)thisfirst
->spare
, "\n");
496 object
*newnode
= new_object(node
->list
);
497 make_pages_node(newnode
, node
, thisfirst
, thislast
, NULL
);
498 objref(node
, newnode
);
503 assert(thislast
== last
|| page
== NULL
);
506 for (page
= first
; page
; page
= page
->next
) {
507 objref(node
, (object
*)page
->spare
);
509 objtext((object
*)page
->spare
, "/Parent ");
510 objref((object
*)page
->spare
, node
);
511 objtext((object
*)page
->spare
, "\n");
517 objtext(node
, "]\n");
520 objtext(node
, "/Resources ");
521 objref(node
, resources
);
525 objtext(node
, ">>\n");