Free up all the data we allocated during the HTML backend.
[sgt/halibut] / bk_man.c
CommitLineData
7136a6c7 1/*
2 * man page backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
4b3c5afb 10typedef struct {
11 wchar_t *th;
12 int headnumbers;
13 int mindepth;
50d6b4bd 14 char *filename;
93688997 15 int charset;
b0ea3acd 16 wchar_t *bullet, *lquote, *rquote;
4b3c5afb 17} manconfig;
18
b0ea3acd 19static void man_text(FILE *, word *,
20 int newline, int quote_props, manconfig *conf);
21static void man_codepara(FILE *, word *, int charset);
22static int man_convert(wchar_t const *s, int maxlen,
23 char **result, int quote_props,
24 int charset, charset_state *state);
25
4b3c5afb 26static manconfig man_configure(paragraph *source) {
b0ea3acd 27 paragraph *p;
4b3c5afb 28 manconfig ret;
29
30 /*
31 * Defaults.
32 */
33 ret.th = NULL;
34 ret.headnumbers = FALSE;
35 ret.mindepth = 0;
50d6b4bd 36 ret.filename = dupstr("output.1");
93688997 37 ret.charset = CS_ASCII;
b0ea3acd 38 ret.bullet = L"\x2022\0o\0\0";
39 ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
40 ret.rquote = uadv(ret.lquote);
4b3c5afb 41
b0ea3acd 42 /*
43 * Two-pass configuration so that we can pick up global config
44 * (e.g. `quotes') before having it overridden by specific
c5546514 45 * config (`man-quotes'), irrespective of the order in which
b0ea3acd 46 * they occur.
47 */
48 for (p = source; p; p = p->next) {
49 if (p->type == para_Config) {
50 if (!ustricmp(p->keyword, L"quotes")) {
51 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
52 ret.lquote = uadv(p->keyword);
53 ret.rquote = uadv(ret.lquote);
54 }
55 }
56 }
57 }
58
59 for (p = source; p; p = p->next) {
60 if (p->type == para_Config) {
61 if (!ustricmp(p->keyword, L"man-identity")) {
4b3c5afb 62 wchar_t *wp, *ep;
63
b0ea3acd 64 wp = uadv(p->keyword);
4b3c5afb 65 ep = wp;
66 while (*ep)
67 ep = uadv(ep);
50d6b4bd 68 sfree(ret.th);
f1530049 69 ret.th = snewn(ep - wp + 1, wchar_t);
4b3c5afb 70 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
b0ea3acd 71 } else if (!ustricmp(p->keyword, L"man-charset")) {
72 char *csname = utoa_dup(uadv(p->keyword), CS_ASCII);
93688997 73 ret.charset = charset_from_localenc(csname);
74 sfree(csname);
b0ea3acd 75 } else if (!ustricmp(p->keyword, L"man-headnumbers")) {
76 ret.headnumbers = utob(uadv(p->keyword));
77 } else if (!ustricmp(p->keyword, L"man-mindepth")) {
78 ret.mindepth = utoi(uadv(p->keyword));
79 } else if (!ustricmp(p->keyword, L"man-filename")) {
50d6b4bd 80 sfree(ret.filename);
b0ea3acd 81 ret.filename = dupstr(adv(p->origkeyword));
82 } else if (!ustricmp(p->keyword, L"man-bullet")) {
83 ret.bullet = uadv(p->keyword);
c5546514 84 } else if (!ustricmp(p->keyword, L"man-quotes")) {
b0ea3acd 85 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
86 ret.lquote = uadv(p->keyword);
87 ret.rquote = uadv(ret.lquote);
88 }
4b3c5afb 89 }
90 }
91 }
92
b0ea3acd 93 /*
94 * Now process fallbacks on quote characters and bullets.
95 */
96 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
97 (!cvt_ok(ret.charset, ret.lquote) ||
98 !cvt_ok(ret.charset, ret.rquote))) {
99 ret.lquote = uadv(ret.rquote);
100 ret.rquote = uadv(ret.lquote);
101 }
102
103 while (*ret.bullet && *uadv(ret.bullet) &&
104 !cvt_ok(ret.charset, ret.bullet))
105 ret.bullet = uadv(ret.bullet);
106
4b3c5afb 107 return ret;
108}
109
110static void man_conf_cleanup(manconfig cf)
111{
112 sfree(cf.th);
50d6b4bd 113 sfree(cf.filename);
4b3c5afb 114}
7136a6c7 115
ba9c1487 116paragraph *man_config_filename(char *filename)
117{
e4ea58f8 118 return cmdline_cfg_simple("man-filename", filename, NULL);
ba9c1487 119}
120
7136a6c7 121#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
122#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
123
124void man_backend(paragraph *sourceform, keywordlist *keywords,
43341922 125 indexdata *idx, void *unused) {
7136a6c7 126 paragraph *p;
127 FILE *fp;
4b3c5afb 128 manconfig conf;
7136a6c7 129
43341922 130 IGNORE(unused);
131 IGNORE(keywords);
132 IGNORE(idx);
7136a6c7 133
4b3c5afb 134 conf = man_configure(sourceform);
135
7136a6c7 136 /*
50d6b4bd 137 * Open the output file.
7136a6c7 138 */
50d6b4bd 139 fp = fopen(conf.filename, "w");
7136a6c7 140 if (!fp) {
50d6b4bd 141 error(err_cantopenw, conf.filename);
7136a6c7 142 return;
143 }
144
145 /* Do the version ID */
146 for (p = sourceform; p; p = p->next)
147 if (p->type == para_VersionID) {
148 fprintf(fp, ".\\\" ");
b0ea3acd 149 man_text(fp, p->words, TRUE, 0, &conf);
7136a6c7 150 }
151
4b3c5afb 152 /* .TH name-of-program manual-section */
22905f72 153 fprintf(fp, ".TH");
154 if (conf.th && *conf.th) {
4b3c5afb 155 char *c;
22905f72 156 wchar_t *wp;
157
158 for (wp = conf.th; *wp; wp = uadv(wp)) {
159 fputs(" \"", fp);
93688997 160 man_convert(wp, 0, &c, QUOTE_QUOTES, conf.charset, NULL);
22905f72 161 fputs(c, fp);
162 sfree(c);
163 fputc('"', fp);
4b3c5afb 164 }
165 }
22905f72 166 fputc('\n', fp);
7136a6c7 167
168 fprintf(fp, ".UC\n");
169
7136a6c7 170 for (p = sourceform; p; p = p->next) switch (p->type) {
171 /*
172 * Things we ignore because we've already processed them or
173 * aren't going to touch them in this pass.
174 */
175 case para_IM:
176 case para_BR:
177 case para_Biblio: /* only touch BiblioCited */
178 case para_VersionID:
7136a6c7 179 case para_NoCite:
180 case para_Title:
181 break;
182
183 /*
184 * Headings.
185 */
186 case para_Chapter:
187 case para_Appendix:
188 case para_UnnumberedChapter:
189 case para_Heading:
190 case para_Subsect:
8902e0ed 191
4b3c5afb 192 {
193 int depth;
194 if (p->type == para_Subsect)
195 depth = p->aux + 2;
196 else if (p->type == para_Heading)
197 depth = 1;
198 else
199 depth = 0;
200 if (depth >= conf.mindepth) {
201 fprintf(fp, ".SH \"");
202 if (conf.headnumbers && p->kwtext) {
b0ea3acd 203 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
4b3c5afb 204 fprintf(fp, " ");
205 }
b0ea3acd 206 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
4b3c5afb 207 fprintf(fp, "\"\n");
208 }
209 break;
210 }
7136a6c7 211
212 /*
213 * Code paragraphs.
214 */
215 case para_Code:
216 fprintf(fp, ".PP\n");
93688997 217 man_codepara(fp, p->words, conf.charset);
7136a6c7 218 break;
219
220 /*
221 * Normal paragraphs.
222 */
223 case para_Normal:
9057a0a8 224 case para_Copyright:
7136a6c7 225 fprintf(fp, ".PP\n");
b0ea3acd 226 man_text(fp, p->words, TRUE, 0, &conf);
7136a6c7 227 break;
228
229 /*
230 * List paragraphs.
231 */
232 case para_Description:
233 case para_BiblioCited:
234 case para_Bullet:
235 case para_NumberedList:
236 if (p->type == para_Bullet) {
b0ea3acd 237 char *bullettext;
238 man_convert(conf.bullet, -1, &bullettext, QUOTE_QUOTES,
239 conf.charset, NULL);
240 fprintf(fp, ".IP \"\\fB%s\\fP\"\n", bullettext);
241 sfree(bullettext);
7136a6c7 242 } else if (p->type == para_NumberedList) {
243 fprintf(fp, ".IP \"");
b0ea3acd 244 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
7136a6c7 245 fprintf(fp, "\"\n");
246 } else if (p->type == para_Description) {
247 /*
248 * Do nothing; the .xP for this paragraph is the .IP
249 * which has come before it in the DescribedThing.
250 */
251 } else if (p->type == para_BiblioCited) {
252 fprintf(fp, ".IP \"");
b0ea3acd 253 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
7136a6c7 254 fprintf(fp, "\"\n");
255 }
b0ea3acd 256 man_text(fp, p->words, TRUE, 0, &conf);
7136a6c7 257 break;
258
259 case para_DescribedThing:
260 fprintf(fp, ".IP \"");
b0ea3acd 261 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
7136a6c7 262 fprintf(fp, "\"\n");
263 break;
264
265 case para_Rule:
266 /*
4b3c5afb 267 * This isn't terribly good. Anyone who wants to do better
268 * should feel free!
7136a6c7 269 */
4b3c5afb 270 fprintf(fp, ".PP\n----------------------------------------\n");
7136a6c7 271 break;
272
273 case para_LcontPush:
2614b01d 274 case para_QuotePush:
7136a6c7 275 fprintf(fp, ".RS\n");
276 break;
277 case para_LcontPop:
2614b01d 278 case para_QuotePop:
7136a6c7 279 fprintf(fp, ".RE\n");
280 break;
281 }
282
283 /*
284 * Tidy up.
285 */
286 fclose(fp);
4b3c5afb 287 man_conf_cleanup(conf);
7136a6c7 288}
289
290/*
93688997 291 * Convert a wide string into a string of chars; mallocs the
292 * resulting string and stores a pointer to it in `*result'.
293 *
294 * If `state' is non-NULL, updates the charset state pointed to. If
295 * `state' is NULL, this function uses its own state, initialises
296 * it from scratch, and cleans it up when finished. If `state' is
297 * non-NULL but _s_ is NULL, cleans up a provided state.
7136a6c7 298 *
299 * Return is nonzero if all characters are OK. If not all
300 * characters are OK but `result' is non-NULL, a result _will_
301 * still be generated!
302 *
93688997 303 * This function also does escaping of groff special characters.
7136a6c7 304 */
93688997 305static int man_convert(wchar_t const *s, int maxlen,
306 char **result, int quote_props,
307 int charset, charset_state *state) {
308 charset_state internal_state = CHARSET_INIT_STATE;
309 int slen, err;
310 char *p = NULL, *q;
7136a6c7 311 int plen = 0, psize = 0;
93688997 312 rdstringc out = {0, 0, NULL};
7136a6c7 313
93688997 314 if (!state)
315 state = &internal_state;
316
317 slen = (s ? ustrlen(s) : 0);
318
319 if (slen > maxlen && maxlen > 0)
320 slen = maxlen;
321
322 psize = 384;
323 plen = 0;
f1530049 324 p = snewn(psize, char);
93688997 325 err = 0;
326
327 while (slen > 0) {
328 int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen,
329 charset, state, (err ? NULL : &err));
330 if (ret > 0) {
331 plen += ret;
332 if (psize - plen < 256) {
7136a6c7 333 psize = plen + 256;
f1530049 334 p = sresize(p, psize, char);
7136a6c7 335 }
7136a6c7 336 }
337 }
93688997 338
339 if (state == &internal_state || s == NULL) {
340 int ret = charset_from_unicode(NULL, 0, p+plen, psize-plen,
341 charset, state, NULL);
342 if (ret > 0)
343 plen += ret;
7136a6c7 344 }
93688997 345
346 for (q = p; q < p+plen; q++) {
347 if (q == p && (*q == '.' || *q == '\'') &&
348 (quote_props & QUOTE_INITCTRL)) {
349 /*
350 * Control character (. or ') at the start of a
351 * line. Quote it by putting \& (troff zero-width
352 * space) before it.
353 */
354 rdaddc(&out, '\\');
355 rdaddc(&out, '&');
356 } else if (*q == '\\') {
357 /*
358 * Quote backslashes by doubling them, always.
359 */
360 rdaddc(&out, '\\');
361 } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) {
362 /*
363 * Double quote within double quotes. Quote it by
364 * doubling.
365 */
366 rdaddc(&out, '"');
367 }
368 rdaddc(&out, *q);
369 }
370
371 sfree(p);
372
373 if (out.text)
374 *result = rdtrimc(&out);
375 else
376 *result = dupstr("");
377
378 return !err;
7136a6c7 379}
380
381static void man_rdaddwc(rdstringc *rs, word *text, word *end,
b0ea3acd 382 int quote_props, manconfig *conf,
383 charset_state *state) {
7136a6c7 384 char *c;
385
386 for (; text && text != end; text = text->next) switch (text->type) {
387 case word_HyperLink:
388 case word_HyperEnd:
389 case word_UpperXref:
390 case word_LowerXref:
391 case word_XrefEnd:
392 case word_IndexRef:
393 break;
394
395 case word_Normal:
396 case word_Emph:
397 case word_Code:
398 case word_WeakCode:
399 case word_WhiteSpace:
400 case word_EmphSpace:
401 case word_CodeSpace:
402 case word_WkCodeSpace:
403 case word_Quote:
404 case word_EmphQuote:
405 case word_CodeQuote:
406 case word_WkCodeQuote:
407 assert(text->type != word_CodeQuote &&
408 text->type != word_WkCodeQuote);
93688997 409
7136a6c7 410 if (towordstyle(text->type) == word_Emph &&
411 (attraux(text->aux) == attr_First ||
93688997 412 attraux(text->aux) == attr_Only)) {
413 if (rs->pos > 0)
414 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 415 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 416 rdaddsc(rs, c);
417 sfree(c);
418 *state = charset_init_state;
7136a6c7 419 rdaddsc(rs, "\\fI");
93688997 420 } else if ((towordstyle(text->type) == word_Code ||
421 towordstyle(text->type) == word_WeakCode) &&
422 (attraux(text->aux) == attr_First ||
423 attraux(text->aux) == attr_Only)) {
424 if (rs->pos > 0)
425 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 426 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 427 rdaddsc(rs, c);
428 sfree(c);
429 *state = charset_init_state;
7136a6c7 430 rdaddsc(rs, "\\fB");
93688997 431 }
432
7136a6c7 433 if (removeattr(text->type) == word_Normal) {
93688997 434 charset_state s2 = *state;
435
7136a6c7 436 if (rs->pos > 0)
437 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 438 if (man_convert(text->text, 0, &c, quote_props, conf->charset, &s2) ||
93688997 439 !text->alt) {
7136a6c7 440 rdaddsc(rs, c);
93688997 441 *state = s2;
442 } else {
b0ea3acd 443 man_rdaddwc(rs, text->alt, NULL, quote_props, conf, state);
93688997 444 }
7136a6c7 445 sfree(c);
446 } else if (removeattr(text->type) == word_WhiteSpace) {
93688997 447 if (rs->pos > 0)
448 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 449 man_convert(L" ", 1, &c, quote_props, conf->charset, state);
93688997 450 rdaddsc(rs, c);
451 sfree(c);
7136a6c7 452 } else if (removeattr(text->type) == word_Quote) {
93688997 453 if (rs->pos > 0)
454 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 455 man_convert(quoteaux(text->aux) == quote_Open ?
456 conf->lquote : conf->rquote, 0,
457 &c, quote_props, conf->charset, state);
93688997 458 rdaddsc(rs, c);
459 sfree(c);
7136a6c7 460 }
93688997 461 if (towordstyle(text->type) != word_Normal &&
7136a6c7 462 (attraux(text->aux) == attr_Last ||
93688997 463 attraux(text->aux) == attr_Only)) {
464 if (rs->pos > 0)
465 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
b0ea3acd 466 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 467 rdaddsc(rs, c);
468 sfree(c);
469 *state = charset_init_state;
7136a6c7 470 rdaddsc(rs, "\\fP");
93688997 471 }
7136a6c7 472 break;
473 }
b0ea3acd 474 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
93688997 475 rdaddsc(rs, c);
476 sfree(c);
7136a6c7 477}
478
93688997 479static void man_text(FILE *fp, word *text, int newline,
b0ea3acd 480 int quote_props, manconfig *conf) {
7136a6c7 481 rdstringc t = { 0, 0, NULL };
93688997 482 charset_state state = CHARSET_INIT_STATE;
7136a6c7 483
b0ea3acd 484 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL, conf, &state);
7136a6c7 485 fprintf(fp, "%s", t.text);
486 sfree(t.text);
487 if (newline)
488 fputc('\n', fp);
489}
490
93688997 491static void man_codepara(FILE *fp, word *text, int charset) {
7136a6c7 492 fprintf(fp, ".nf\n");
493 for (; text; text = text->next) if (text->type == word_WeakCode) {
494 char *c;
4b3c5afb 495 wchar_t *t, *e;
496 int quote_props = QUOTE_INITCTRL;
497
498 t = text->text;
499 if (text->next && text->next->type == word_Emph) {
500 e = text->next->text;
501 text = text->next;
502 } else
503 e = NULL;
504
505 while (e && *e && *t) {
506 int n;
507 int ec = *e;
508
509 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
510 if (ec == 'i')
511 fprintf(fp, "\\fI");
512 else if (ec == 'b')
513 fprintf(fp, "\\fB");
93688997 514 man_convert(t, n, &c, quote_props, charset, NULL);
4b3c5afb 515 quote_props &= ~QUOTE_INITCTRL;
516 fprintf(fp, "%s", c);
517 sfree(c);
518 if (ec == 'i' || ec == 'b')
519 fprintf(fp, "\\fP");
520 t += n;
521 e += n;
522 }
93688997 523 man_convert(t, 0, &c, quote_props, charset, NULL);
7136a6c7 524 fprintf(fp, "%s\n", c);
525 sfree(c);
526 }
527 fprintf(fp, ".fi\n");
528}