Sort out error handling everywhere a charset name is converted into
[sgt/halibut] / bk_man.c
1 /*
2 * man page backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include "halibut.h"
9
10 typedef struct {
11 wchar_t *th;
12 int headnumbers;
13 int mindepth;
14 char *filename;
15 int charset;
16 wchar_t *bullet, *lquote, *rquote;
17 } manconfig;
18
19 static void man_text(FILE *, word *,
20 int newline, int quote_props, manconfig *conf);
21 static void man_codepara(FILE *, word *, int charset);
22 static int man_convert(wchar_t const *s, int maxlen,
23 char **result, int quote_props,
24 int charset, charset_state *state);
25
26 static manconfig man_configure(paragraph *source) {
27 paragraph *p;
28 manconfig ret;
29
30 /*
31 * Defaults.
32 */
33 ret.th = NULL;
34 ret.headnumbers = FALSE;
35 ret.mindepth = 0;
36 ret.filename = dupstr("output.1");
37 ret.charset = CS_ASCII;
38 ret.bullet = L"\x2022\0o\0\0";
39 ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
40 ret.rquote = uadv(ret.lquote);
41
42 /*
43 * Two-pass configuration so that we can pick up global config
44 * (e.g. `quotes') before having it overridden by specific
45 * config (`man-quotes'), irrespective of the order in which
46 * they occur.
47 */
48 for (p = source; p; p = p->next) {
49 if (p->type == para_Config) {
50 if (!ustricmp(p->keyword, L"quotes")) {
51 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
52 ret.lquote = uadv(p->keyword);
53 ret.rquote = uadv(ret.lquote);
54 }
55 }
56 }
57 }
58
59 for (p = source; p; p = p->next) {
60 if (p->type == para_Config) {
61 if (!ustricmp(p->keyword, L"man-identity")) {
62 wchar_t *wp, *ep;
63
64 wp = uadv(p->keyword);
65 ep = wp;
66 while (*ep)
67 ep = uadv(ep);
68 sfree(ret.th);
69 ret.th = snewn(ep - wp + 1, wchar_t);
70 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
71 } else if (!ustricmp(p->keyword, L"man-charset")) {
72 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
73 } else if (!ustricmp(p->keyword, L"man-headnumbers")) {
74 ret.headnumbers = utob(uadv(p->keyword));
75 } else if (!ustricmp(p->keyword, L"man-mindepth")) {
76 ret.mindepth = utoi(uadv(p->keyword));
77 } else if (!ustricmp(p->keyword, L"man-filename")) {
78 sfree(ret.filename);
79 ret.filename = dupstr(adv(p->origkeyword));
80 } else if (!ustricmp(p->keyword, L"man-bullet")) {
81 ret.bullet = uadv(p->keyword);
82 } else if (!ustricmp(p->keyword, L"man-quotes")) {
83 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
84 ret.lquote = uadv(p->keyword);
85 ret.rquote = uadv(ret.lquote);
86 }
87 }
88 }
89 }
90
91 /*
92 * Now process fallbacks on quote characters and bullets.
93 */
94 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
95 (!cvt_ok(ret.charset, ret.lquote) ||
96 !cvt_ok(ret.charset, ret.rquote))) {
97 ret.lquote = uadv(ret.rquote);
98 ret.rquote = uadv(ret.lquote);
99 }
100
101 while (*ret.bullet && *uadv(ret.bullet) &&
102 !cvt_ok(ret.charset, ret.bullet))
103 ret.bullet = uadv(ret.bullet);
104
105 return ret;
106 }
107
108 static void man_conf_cleanup(manconfig cf)
109 {
110 sfree(cf.th);
111 sfree(cf.filename);
112 }
113
114 paragraph *man_config_filename(char *filename)
115 {
116 return cmdline_cfg_simple("man-filename", filename, NULL);
117 }
118
119 #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
120 #define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
121
122 void man_backend(paragraph *sourceform, keywordlist *keywords,
123 indexdata *idx, void *unused) {
124 paragraph *p;
125 FILE *fp;
126 manconfig conf;
127
128 IGNORE(unused);
129 IGNORE(keywords);
130 IGNORE(idx);
131
132 conf = man_configure(sourceform);
133
134 /*
135 * Open the output file.
136 */
137 fp = fopen(conf.filename, "w");
138 if (!fp) {
139 error(err_cantopenw, conf.filename);
140 return;
141 }
142
143 /* Do the version ID */
144 for (p = sourceform; p; p = p->next)
145 if (p->type == para_VersionID) {
146 fprintf(fp, ".\\\" ");
147 man_text(fp, p->words, TRUE, 0, &conf);
148 }
149
150 /* .TH name-of-program manual-section */
151 fprintf(fp, ".TH");
152 if (conf.th && *conf.th) {
153 char *c;
154 wchar_t *wp;
155
156 for (wp = conf.th; *wp; wp = uadv(wp)) {
157 fputs(" \"", fp);
158 man_convert(wp, 0, &c, QUOTE_QUOTES, conf.charset, NULL);
159 fputs(c, fp);
160 sfree(c);
161 fputc('"', fp);
162 }
163 }
164 fputc('\n', fp);
165
166 fprintf(fp, ".UC\n");
167
168 for (p = sourceform; p; p = p->next) switch (p->type) {
169 /*
170 * Things we ignore because we've already processed them or
171 * aren't going to touch them in this pass.
172 */
173 case para_IM:
174 case para_BR:
175 case para_Biblio: /* only touch BiblioCited */
176 case para_VersionID:
177 case para_NoCite:
178 case para_Title:
179 break;
180
181 /*
182 * Headings.
183 */
184 case para_Chapter:
185 case para_Appendix:
186 case para_UnnumberedChapter:
187 case para_Heading:
188 case para_Subsect:
189
190 {
191 int depth;
192 if (p->type == para_Subsect)
193 depth = p->aux + 2;
194 else if (p->type == para_Heading)
195 depth = 1;
196 else
197 depth = 0;
198 if (depth >= conf.mindepth) {
199 fprintf(fp, ".SH \"");
200 if (conf.headnumbers && p->kwtext) {
201 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
202 fprintf(fp, " ");
203 }
204 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
205 fprintf(fp, "\"\n");
206 }
207 break;
208 }
209
210 /*
211 * Code paragraphs.
212 */
213 case para_Code:
214 fprintf(fp, ".PP\n");
215 man_codepara(fp, p->words, conf.charset);
216 break;
217
218 /*
219 * Normal paragraphs.
220 */
221 case para_Normal:
222 case para_Copyright:
223 fprintf(fp, ".PP\n");
224 man_text(fp, p->words, TRUE, 0, &conf);
225 break;
226
227 /*
228 * List paragraphs.
229 */
230 case para_Description:
231 case para_BiblioCited:
232 case para_Bullet:
233 case para_NumberedList:
234 if (p->type == para_Bullet) {
235 char *bullettext;
236 man_convert(conf.bullet, -1, &bullettext, QUOTE_QUOTES,
237 conf.charset, NULL);
238 fprintf(fp, ".IP \"\\fB%s\\fP\"\n", bullettext);
239 sfree(bullettext);
240 } else if (p->type == para_NumberedList) {
241 fprintf(fp, ".IP \"");
242 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
243 fprintf(fp, "\"\n");
244 } else if (p->type == para_Description) {
245 /*
246 * Do nothing; the .xP for this paragraph is the .IP
247 * which has come before it in the DescribedThing.
248 */
249 } else if (p->type == para_BiblioCited) {
250 fprintf(fp, ".IP \"");
251 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
252 fprintf(fp, "\"\n");
253 }
254 man_text(fp, p->words, TRUE, 0, &conf);
255 break;
256
257 case para_DescribedThing:
258 fprintf(fp, ".IP \"");
259 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
260 fprintf(fp, "\"\n");
261 break;
262
263 case para_Rule:
264 /*
265 * This isn't terribly good. Anyone who wants to do better
266 * should feel free!
267 */
268 fprintf(fp, ".PP\n----------------------------------------\n");
269 break;
270
271 case para_LcontPush:
272 case para_QuotePush:
273 fprintf(fp, ".RS\n");
274 break;
275 case para_LcontPop:
276 case para_QuotePop:
277 fprintf(fp, ".RE\n");
278 break;
279 }
280
281 /*
282 * Tidy up.
283 */
284 fclose(fp);
285 man_conf_cleanup(conf);
286 }
287
288 /*
289 * Convert a wide string into a string of chars; mallocs the
290 * resulting string and stores a pointer to it in `*result'.
291 *
292 * If `state' is non-NULL, updates the charset state pointed to. If
293 * `state' is NULL, this function uses its own state, initialises
294 * it from scratch, and cleans it up when finished. If `state' is
295 * non-NULL but _s_ is NULL, cleans up a provided state.
296 *
297 * Return is nonzero if all characters are OK. If not all
298 * characters are OK but `result' is non-NULL, a result _will_
299 * still be generated!
300 *
301 * This function also does escaping of groff special characters.
302 */
303 static int man_convert(wchar_t const *s, int maxlen,
304 char **result, int quote_props,
305 int charset, charset_state *state) {
306 charset_state internal_state = CHARSET_INIT_STATE;
307 int slen, err;
308 char *p = NULL, *q;
309 int plen = 0, psize = 0;
310 rdstringc out = {0, 0, NULL};
311
312 if (!state)
313 state = &internal_state;
314
315 slen = (s ? ustrlen(s) : 0);
316
317 if (slen > maxlen && maxlen > 0)
318 slen = maxlen;
319
320 psize = 384;
321 plen = 0;
322 p = snewn(psize, char);
323 err = 0;
324
325 while (slen > 0) {
326 int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen,
327 charset, state, (err ? NULL : &err));
328 if (ret > 0) {
329 plen += ret;
330 if (psize - plen < 256) {
331 psize = plen + 256;
332 p = sresize(p, psize, char);
333 }
334 }
335 }
336
337 if (state == &internal_state || s == NULL) {
338 int ret = charset_from_unicode(NULL, 0, p+plen, psize-plen,
339 charset, state, NULL);
340 if (ret > 0)
341 plen += ret;
342 }
343
344 for (q = p; q < p+plen; q++) {
345 if (q == p && (*q == '.' || *q == '\'') &&
346 (quote_props & QUOTE_INITCTRL)) {
347 /*
348 * Control character (. or ') at the start of a
349 * line. Quote it by putting \& (troff zero-width
350 * space) before it.
351 */
352 rdaddc(&out, '\\');
353 rdaddc(&out, '&');
354 } else if (*q == '\\') {
355 /*
356 * Quote backslashes by doubling them, always.
357 */
358 rdaddc(&out, '\\');
359 } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) {
360 /*
361 * Double quote within double quotes. Quote it by
362 * doubling.
363 */
364 rdaddc(&out, '"');
365 }
366 rdaddc(&out, *q);
367 }
368
369 sfree(p);
370
371 if (out.text)
372 *result = rdtrimc(&out);
373 else
374 *result = dupstr("");
375
376 return !err;
377 }
378
379 static void man_rdaddwc(rdstringc *rs, word *text, word *end,
380 int quote_props, manconfig *conf,
381 charset_state *state) {
382 char *c;
383
384 for (; text && text != end; text = text->next) switch (text->type) {
385 case word_HyperLink:
386 case word_HyperEnd:
387 case word_UpperXref:
388 case word_LowerXref:
389 case word_XrefEnd:
390 case word_IndexRef:
391 break;
392
393 case word_Normal:
394 case word_Emph:
395 case word_Code:
396 case word_WeakCode:
397 case word_WhiteSpace:
398 case word_EmphSpace:
399 case word_CodeSpace:
400 case word_WkCodeSpace:
401 case word_Quote:
402 case word_EmphQuote:
403 case word_CodeQuote:
404 case word_WkCodeQuote:
405 assert(text->type != word_CodeQuote &&
406 text->type != word_WkCodeQuote);
407
408 if (towordstyle(text->type) == word_Emph &&
409 (attraux(text->aux) == attr_First ||
410 attraux(text->aux) == attr_Only)) {
411 if (rs->pos > 0)
412 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
413 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
414 rdaddsc(rs, c);
415 sfree(c);
416 *state = charset_init_state;
417 rdaddsc(rs, "\\fI");
418 } else if ((towordstyle(text->type) == word_Code ||
419 towordstyle(text->type) == word_WeakCode) &&
420 (attraux(text->aux) == attr_First ||
421 attraux(text->aux) == attr_Only)) {
422 if (rs->pos > 0)
423 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
424 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
425 rdaddsc(rs, c);
426 sfree(c);
427 *state = charset_init_state;
428 rdaddsc(rs, "\\fB");
429 }
430
431 if (removeattr(text->type) == word_Normal) {
432 charset_state s2 = *state;
433
434 if (rs->pos > 0)
435 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
436 if (man_convert(text->text, 0, &c, quote_props, conf->charset, &s2) ||
437 !text->alt) {
438 rdaddsc(rs, c);
439 *state = s2;
440 } else {
441 man_rdaddwc(rs, text->alt, NULL, quote_props, conf, state);
442 }
443 sfree(c);
444 } else if (removeattr(text->type) == word_WhiteSpace) {
445 if (rs->pos > 0)
446 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
447 man_convert(L" ", 1, &c, quote_props, conf->charset, state);
448 rdaddsc(rs, c);
449 sfree(c);
450 } else if (removeattr(text->type) == word_Quote) {
451 if (rs->pos > 0)
452 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
453 man_convert(quoteaux(text->aux) == quote_Open ?
454 conf->lquote : conf->rquote, 0,
455 &c, quote_props, conf->charset, state);
456 rdaddsc(rs, c);
457 sfree(c);
458 }
459 if (towordstyle(text->type) != word_Normal &&
460 (attraux(text->aux) == attr_Last ||
461 attraux(text->aux) == attr_Only)) {
462 if (rs->pos > 0)
463 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
464 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
465 rdaddsc(rs, c);
466 sfree(c);
467 *state = charset_init_state;
468 rdaddsc(rs, "\\fP");
469 }
470 break;
471 }
472 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
473 rdaddsc(rs, c);
474 sfree(c);
475 }
476
477 static void man_text(FILE *fp, word *text, int newline,
478 int quote_props, manconfig *conf) {
479 rdstringc t = { 0, 0, NULL };
480 charset_state state = CHARSET_INIT_STATE;
481
482 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL, conf, &state);
483 fprintf(fp, "%s", t.text);
484 sfree(t.text);
485 if (newline)
486 fputc('\n', fp);
487 }
488
489 static void man_codepara(FILE *fp, word *text, int charset) {
490 fprintf(fp, ".nf\n");
491 for (; text; text = text->next) if (text->type == word_WeakCode) {
492 char *c;
493 wchar_t *t, *e;
494 int quote_props = QUOTE_INITCTRL;
495
496 t = text->text;
497 if (text->next && text->next->type == word_Emph) {
498 e = text->next->text;
499 text = text->next;
500 } else
501 e = NULL;
502
503 while (e && *e && *t) {
504 int n;
505 int ec = *e;
506
507 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
508 if (ec == 'i')
509 fprintf(fp, "\\fI");
510 else if (ec == 'b')
511 fprintf(fp, "\\fB");
512 man_convert(t, n, &c, quote_props, charset, NULL);
513 quote_props &= ~QUOTE_INITCTRL;
514 fprintf(fp, "%s", c);
515 sfree(c);
516 if (ec == 'i' || ec == 'b')
517 fprintf(fp, "\\fP");
518 t += n;
519 e += n;
520 }
521 man_convert(t, 0, &c, quote_props, charset, NULL);
522 fprintf(fp, "%s\n", c);
523 sfree(c);
524 }
525 fprintf(fp, ".fi\n");
526 }