Add kerning support to paper backends, embedding the kerning tables from
[sgt/halibut] / bk_man.c
CommitLineData
7136a6c7 1/*
2 * man page backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
4b3c5afb 10typedef struct {
11 wchar_t *th;
12 int headnumbers;
13 int mindepth;
50d6b4bd 14 char *filename;
93688997 15 int charset;
b0ea3acd 16 wchar_t *bullet, *lquote, *rquote;
4b3c5afb 17} manconfig;
18
b0ea3acd 19static void man_text(FILE *, word *,
20 int newline, int quote_props, manconfig *conf);
21static void man_codepara(FILE *, word *, int charset);
22static int man_convert(wchar_t const *s, int maxlen,
23 char **result, int quote_props,
24 int charset, charset_state *state);
25
4b3c5afb 26static manconfig man_configure(paragraph *source) {
b0ea3acd 27 paragraph *p;
4b3c5afb 28 manconfig ret;
29
30 /*
31 * Defaults.
32 */
33 ret.th = NULL;
34 ret.headnumbers = FALSE;
35 ret.mindepth = 0;
50d6b4bd 36 ret.filename = dupstr("output.1");
93688997 37 ret.charset = CS_ASCII;
b0ea3acd 38 ret.bullet = L"\x2022\0o\0\0";
39 ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
40 ret.rquote = uadv(ret.lquote);
4b3c5afb 41
b0ea3acd 42 /*
43 * Two-pass configuration so that we can pick up global config
44 * (e.g. `quotes') before having it overridden by specific
c5546514 45 * config (`man-quotes'), irrespective of the order in which
b0ea3acd 46 * they occur.
47 */
48 for (p = source; p; p = p->next) {
49 if (p->type == para_Config) {
50 if (!ustricmp(p->keyword, L"quotes")) {
51 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
52 ret.lquote = uadv(p->keyword);
53 ret.rquote = uadv(ret.lquote);
54 }
55 }
56 }
57 }
58
59 for (p = source; p; p = p->next) {
60 if (p->type == para_Config) {
61 if (!ustricmp(p->keyword, L"man-identity")) {
4b3c5afb 62 wchar_t *wp, *ep;
63
b0ea3acd 64 wp = uadv(p->keyword);
4b3c5afb 65 ep = wp;
66 while (*ep)
67 ep = uadv(ep);
50d6b4bd 68 sfree(ret.th);
f1530049 69 ret.th = snewn(ep - wp + 1, wchar_t);
4b3c5afb 70 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
b0ea3acd 71 } else if (!ustricmp(p->keyword, L"man-charset")) {
0960a3d8 72 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
b0ea3acd 73 } else if (!ustricmp(p->keyword, L"man-headnumbers")) {
74 ret.headnumbers = utob(uadv(p->keyword));
75 } else if (!ustricmp(p->keyword, L"man-mindepth")) {
76 ret.mindepth = utoi(uadv(p->keyword));
77 } else if (!ustricmp(p->keyword, L"man-filename")) {
50d6b4bd 78 sfree(ret.filename);
b0ea3acd 79 ret.filename = dupstr(adv(p->origkeyword));
80 } else if (!ustricmp(p->keyword, L"man-bullet")) {
81 ret.bullet = uadv(p->keyword);
c5546514 82 } else if (!ustricmp(p->keyword, L"man-quotes")) {
b0ea3acd 83 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
84 ret.lquote = uadv(p->keyword);
85 ret.rquote = uadv(ret.lquote);
86 }
4b3c5afb 87 }
88 }
89 }
90
b0ea3acd 91 /*
92 * Now process fallbacks on quote characters and bullets.
93 */
94 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
95 (!cvt_ok(ret.charset, ret.lquote) ||
96 !cvt_ok(ret.charset, ret.rquote))) {
97 ret.lquote = uadv(ret.rquote);
98 ret.rquote = uadv(ret.lquote);
99 }
100
101 while (*ret.bullet && *uadv(ret.bullet) &&
102 !cvt_ok(ret.charset, ret.bullet))
103 ret.bullet = uadv(ret.bullet);
104
4b3c5afb 105 return ret;
106}
107
108static void man_conf_cleanup(manconfig cf)
109{
110 sfree(cf.th);
50d6b4bd 111 sfree(cf.filename);
4b3c5afb 112}
7136a6c7 113
ba9c1487 114paragraph *man_config_filename(char *filename)
115{
e4ea58f8 116 return cmdline_cfg_simple("man-filename", filename, NULL);
ba9c1487 117}
118
7136a6c7 119#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
120#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
121
122void man_backend(paragraph *sourceform, keywordlist *keywords,
43341922 123 indexdata *idx, void *unused) {
7136a6c7 124 paragraph *p;
125 FILE *fp;
4b3c5afb 126 manconfig conf;
02478c4f 127 int had_described_thing;
7136a6c7 128
43341922 129 IGNORE(unused);
130 IGNORE(keywords);
131 IGNORE(idx);
7136a6c7 132
4b3c5afb 133 conf = man_configure(sourceform);
134
7136a6c7 135 /*
50d6b4bd 136 * Open the output file.
7136a6c7 137 */
50d6b4bd 138 fp = fopen(conf.filename, "w");
7136a6c7 139 if (!fp) {
50d6b4bd 140 error(err_cantopenw, conf.filename);
7136a6c7 141 return;
142 }
143
144 /* Do the version ID */
145 for (p = sourceform; p; p = p->next)
146 if (p->type == para_VersionID) {
147 fprintf(fp, ".\\\" ");
b0ea3acd 148 man_text(fp, p->words, TRUE, 0, &conf);
7136a6c7 149 }
150
4b3c5afb 151 /* .TH name-of-program manual-section */
22905f72 152 fprintf(fp, ".TH");
153 if (conf.th && *conf.th) {
4b3c5afb 154 char *c;
22905f72 155 wchar_t *wp;
156
157 for (wp = conf.th; *wp; wp = uadv(wp)) {
158 fputs(" \"", fp);
93688997 159 man_convert(wp, 0, &c, QUOTE_QUOTES, conf.charset, NULL);
22905f72 160 fputs(c, fp);
161 sfree(c);
162 fputc('"', fp);
4b3c5afb 163 }
164 }
22905f72 165 fputc('\n', fp);
7136a6c7 166
167 fprintf(fp, ".UC\n");
168
02478c4f 169 had_described_thing = FALSE;
170#define cleanup_described_thing do { \
171 if (had_described_thing) \
172 fprintf(fp, "\n"); \
173 had_described_thing = FALSE; \
174} while (0)
175
7136a6c7 176 for (p = sourceform; p; p = p->next) switch (p->type) {
177 /*
178 * Things we ignore because we've already processed them or
179 * aren't going to touch them in this pass.
180 */
181 case para_IM:
182 case para_BR:
183 case para_Biblio: /* only touch BiblioCited */
184 case para_VersionID:
7136a6c7 185 case para_NoCite:
186 case para_Title:
187 break;
188
189 /*
190 * Headings.
191 */
192 case para_Chapter:
193 case para_Appendix:
194 case para_UnnumberedChapter:
195 case para_Heading:
196 case para_Subsect:
8902e0ed 197
02478c4f 198 cleanup_described_thing;
4b3c5afb 199 {
200 int depth;
201 if (p->type == para_Subsect)
202 depth = p->aux + 2;
203 else if (p->type == para_Heading)
204 depth = 1;
205 else
206 depth = 0;
207 if (depth >= conf.mindepth) {
208 fprintf(fp, ".SH \"");
209 if (conf.headnumbers && p->kwtext) {
b0ea3acd 210 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
4b3c5afb 211 fprintf(fp, " ");
212 }
b0ea3acd 213 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
4b3c5afb 214 fprintf(fp, "\"\n");
215 }
216 break;
217 }
7136a6c7 218
219 /*
220 * Code paragraphs.
221 */
222 case para_Code:
02478c4f 223 cleanup_described_thing;
7136a6c7 224 fprintf(fp, ".PP\n");
93688997 225 man_codepara(fp, p->words, conf.charset);
7136a6c7 226 break;
227
228 /*
229 * Normal paragraphs.
230 */
231 case para_Normal:
9057a0a8 232 case para_Copyright:
02478c4f 233 cleanup_described_thing;
7136a6c7 234 fprintf(fp, ".PP\n");
b0ea3acd 235 man_text(fp, p->words, TRUE, 0, &conf);
7136a6c7 236 break;
237
238 /*
239 * List paragraphs.
240 */
241 case para_Description:
242 case para_BiblioCited:
243 case para_Bullet:
244 case para_NumberedList:
02478c4f 245 if (p->type != para_Description)
246 cleanup_described_thing;
247
7136a6c7 248 if (p->type == para_Bullet) {
b0ea3acd 249 char *bullettext;
250 man_convert(conf.bullet, -1, &bullettext, QUOTE_QUOTES,
251 conf.charset, NULL);
252 fprintf(fp, ".IP \"\\fB%s\\fP\"\n", bullettext);
253 sfree(bullettext);
7136a6c7 254 } else if (p->type == para_NumberedList) {
255 fprintf(fp, ".IP \"");
b0ea3acd 256 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
7136a6c7 257 fprintf(fp, "\"\n");
258 } else if (p->type == para_Description) {
02478c4f 259 if (had_described_thing) {
260 /*
261 * Do nothing; the .xP for this paragraph is the
262 * .IP which has come before it in the
263 * DescribedThing.
264 */
265 } else {
266 /*
267 * A \dd without a preceding \dt is given a blank
268 * one.
269 */
270 fprintf(fp, ".IP \"\"\n");
271 }
7136a6c7 272 } else if (p->type == para_BiblioCited) {
273 fprintf(fp, ".IP \"");
b0ea3acd 274 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
7136a6c7 275 fprintf(fp, "\"\n");
276 }
b0ea3acd 277 man_text(fp, p->words, TRUE, 0, &conf);
02478c4f 278 had_described_thing = FALSE;
7136a6c7 279 break;
280
281 case para_DescribedThing:
02478c4f 282 cleanup_described_thing;
7136a6c7 283 fprintf(fp, ".IP \"");
b0ea3acd 284 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
7136a6c7 285 fprintf(fp, "\"\n");
02478c4f 286 had_described_thing = TRUE;
7136a6c7 287 break;
288
289 case para_Rule:
290 /*
4b3c5afb 291 * This isn't terribly good. Anyone who wants to do better
292 * should feel free!
7136a6c7 293 */
02478c4f 294 cleanup_described_thing;
4b3c5afb 295 fprintf(fp, ".PP\n----------------------------------------\n");
7136a6c7 296 break;
297
298 case para_LcontPush:
2614b01d 299 case para_QuotePush:
02478c4f 300 cleanup_described_thing;
7136a6c7 301 fprintf(fp, ".RS\n");
302 break;
303 case para_LcontPop:
2614b01d 304 case para_QuotePop:
02478c4f 305 cleanup_described_thing;
7136a6c7 306 fprintf(fp, ".RE\n");
307 break;
308 }
02478c4f 309 cleanup_described_thing;
7136a6c7 310
311 /*
312 * Tidy up.
313 */
314 fclose(fp);
4b3c5afb 315 man_conf_cleanup(conf);
7136a6c7 316}
317
318/*
93688997 319 * Convert a wide string into a string of chars; mallocs the
320 * resulting string and stores a pointer to it in `*result'.
321 *
322 * If `state' is non-NULL, updates the charset state pointed to. If
323 * `state' is NULL, this function uses its own state, initialises
324 * it from scratch, and cleans it up when finished. If `state' is
325 * non-NULL but _s_ is NULL, cleans up a provided state.
7136a6c7 326 *
327 * Return is nonzero if all characters are OK. If not all
328 * characters are OK but `result' is non-NULL, a result _will_
329 * still be generated!
330 *
93688997 331 * This function also does escaping of groff special characters.
7136a6c7 332 */
93688997 333static int man_convert(wchar_t const *s, int maxlen,
334 char **result, int quote_props,
335 int charset, charset_state *state) {
336 charset_state internal_state = CHARSET_INIT_STATE;
337 int slen, err;
338 char *p = NULL, *q;
7136a6c7 339 int plen = 0, psize = 0;
93688997 340 rdstringc out = {0, 0, NULL};
7136a6c7 341
93688997 342 if (!state)
343 state = &internal_state;
344
345 slen = (s ? ustrlen(s) : 0);
346
347 if (slen > maxlen && maxlen > 0)
348 slen = maxlen;
349
350 psize = 384;
351 plen = 0;
f1530049 352 p = snewn(psize, char);
93688997 353 err = 0;
354
355 while (slen > 0) {
356 int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen,
357 charset, state, (err ? NULL : &err));
358 if (ret > 0) {
359 plen += ret;
360 if (psize - plen < 256) {
7136a6c7 361 psize = plen + 256;
f1530049 362 p = sresize(p, psize, char);
7136a6c7 363 }
7136a6c7 364 }
365 }
93688997 366
367 if (state == &internal_state || s == NULL) {
368 int ret = charset_from_unicode(NULL, 0, p+plen, psize-plen,
369 charset, state, NULL);
370 if (ret > 0)
371 plen += ret;
7136a6c7 372 }
93688997 373
374 for (q = p; q < p+plen; q++) {
375 if (q == p && (*q == '.' || *q == '\'') &&
376 (quote_props & QUOTE_INITCTRL)) {
377 /*
378 * Control character (. or ') at the start of a
379 * line. Quote it by putting \& (troff zero-width
380 * space) before it.
381 */
382 rdaddc(&out, '\\');
383 rdaddc(&out, '&');
384 } else if (*q == '\\') {
385 /*
386 * Quote backslashes by doubling them, always.
387 */
388 rdaddc(&out, '\\');
389 } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) {
390 /*
391 * Double quote within double quotes. Quote it by
392 * doubling.
393 */
394 rdaddc(&out, '"');
395 }
396 rdaddc(&out, *q);
397 }
398
399 sfree(p);
400
401 if (out.text)
402 *result = rdtrimc(&out);
403 else
404 *result = dupstr("");
405
406 return !err;
7136a6c7 407}
408
409static void man_rdaddwc(rdstringc *rs, word *text, word *end,
b0ea3acd 410 int quote_props, manconfig *conf,
411 charset_state *state) {
7136a6c7 412 char *c;
413
414 for (; text && text != end; text = text->next) switch (text->type) {
415 case word_HyperLink:
416 case word_HyperEnd:
417 case word_UpperXref:
418 case word_LowerXref:
419 case word_XrefEnd:
420 case word_IndexRef:
421 break;
422
423 case word_Normal:
424 case word_Emph:
425 case word_Code:
426 case word_WeakCode:
427 case word_WhiteSpace:
428 case word_EmphSpace:
429 case word_CodeSpace:
430 case word_WkCodeSpace:
431 case word_Quote:
432 case word_EmphQuote:
433 case word_CodeQuote:
434 case word_WkCodeQuote:
435 assert(text->type != word_CodeQuote &&
436 text->type != word_WkCodeQuote);
93688997 437
7136a6c7 438 if (towordstyle(text->type) == word_Emph &&
439 (attraux(text->aux) == attr_First ||
93688997 440 attraux(text->aux) == attr_Only)) {
441 if (rs->pos > 0)
442 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 443 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 444 rdaddsc(rs, c);
445 sfree(c);
446 *state = charset_init_state;
7136a6c7 447 rdaddsc(rs, "\\fI");
93688997 448 } else if ((towordstyle(text->type) == word_Code ||
449 towordstyle(text->type) == word_WeakCode) &&
450 (attraux(text->aux) == attr_First ||
451 attraux(text->aux) == attr_Only)) {
452 if (rs->pos > 0)
453 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 454 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 455 rdaddsc(rs, c);
456 sfree(c);
457 *state = charset_init_state;
7136a6c7 458 rdaddsc(rs, "\\fB");
93688997 459 }
460
7136a6c7 461 if (removeattr(text->type) == word_Normal) {
93688997 462 charset_state s2 = *state;
463
7136a6c7 464 if (rs->pos > 0)
465 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 466 if (man_convert(text->text, 0, &c, quote_props, conf->charset, &s2) ||
93688997 467 !text->alt) {
7136a6c7 468 rdaddsc(rs, c);
93688997 469 *state = s2;
470 } else {
b0ea3acd 471 man_rdaddwc(rs, text->alt, NULL, quote_props, conf, state);
93688997 472 }
7136a6c7 473 sfree(c);
474 } else if (removeattr(text->type) == word_WhiteSpace) {
93688997 475 if (rs->pos > 0)
476 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 477 man_convert(L" ", 1, &c, quote_props, conf->charset, state);
93688997 478 rdaddsc(rs, c);
479 sfree(c);
7136a6c7 480 } else if (removeattr(text->type) == word_Quote) {
93688997 481 if (rs->pos > 0)
482 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 483 man_convert(quoteaux(text->aux) == quote_Open ?
484 conf->lquote : conf->rquote, 0,
485 &c, quote_props, conf->charset, state);
93688997 486 rdaddsc(rs, c);
487 sfree(c);
7136a6c7 488 }
93688997 489 if (towordstyle(text->type) != word_Normal &&
7136a6c7 490 (attraux(text->aux) == attr_Last ||
93688997 491 attraux(text->aux) == attr_Only)) {
492 if (rs->pos > 0)
493 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 494 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 495 rdaddsc(rs, c);
496 sfree(c);
497 *state = charset_init_state;
7136a6c7 498 rdaddsc(rs, "\\fP");
93688997 499 }
7136a6c7 500 break;
501 }
b0ea3acd 502 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 503 rdaddsc(rs, c);
504 sfree(c);
7136a6c7 505}
506
93688997 507static void man_text(FILE *fp, word *text, int newline,
b0ea3acd 508 int quote_props, manconfig *conf) {
7136a6c7 509 rdstringc t = { 0, 0, NULL };
93688997 510 charset_state state = CHARSET_INIT_STATE;
7136a6c7 511
b0ea3acd 512 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL, conf, &state);
7136a6c7 513 fprintf(fp, "%s", t.text);
514 sfree(t.text);
515 if (newline)
516 fputc('\n', fp);
517}
518
93688997 519static void man_codepara(FILE *fp, word *text, int charset) {
7136a6c7 520 fprintf(fp, ".nf\n");
521 for (; text; text = text->next) if (text->type == word_WeakCode) {
522 char *c;
4b3c5afb 523 wchar_t *t, *e;
524 int quote_props = QUOTE_INITCTRL;
525
526 t = text->text;
527 if (text->next && text->next->type == word_Emph) {
528 e = text->next->text;
529 text = text->next;
530 } else
531 e = NULL;
532
533 while (e && *e && *t) {
534 int n;
535 int ec = *e;
536
537 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
538 if (ec == 'i')
539 fprintf(fp, "\\fI");
540 else if (ec == 'b')
541 fprintf(fp, "\\fB");
93688997 542 man_convert(t, n, &c, quote_props, charset, NULL);
4b3c5afb 543 quote_props &= ~QUOTE_INITCTRL;
544 fprintf(fp, "%s", c);
545 sfree(c);
546 if (ec == 'i' || ec == 'b')
547 fprintf(fp, "\\fP");
548 t += n;
549 e += n;
550 }
93688997 551 man_convert(t, 0, &c, quote_props, charset, NULL);
7136a6c7 552 fprintf(fp, "%s\n", c);
553 sfree(c);
554 }
555 fprintf(fp, ".fi\n");
556}