It turns out that the man format requires the \& sequence (to
[sgt/halibut] / bk_man.c
1 /*
2 * man page backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include "halibut.h"
9
10 typedef struct {
11 wchar_t *th;
12 int headnumbers;
13 int mindepth;
14 char *filename;
15 int charset;
16 wchar_t *bullet, *lquote, *rquote;
17 } manconfig;
18
19 static void man_text(FILE *, word *,
20 int newline, int quote_props, manconfig *conf);
21 static void man_codepara(FILE *, word *, int charset);
22 static int man_convert(wchar_t const *s, int maxlen,
23 char **result, int quote_props,
24 int charset, charset_state *state);
25
26 static manconfig man_configure(paragraph *source) {
27 paragraph *p;
28 manconfig ret;
29
30 /*
31 * Defaults.
32 */
33 ret.th = NULL;
34 ret.headnumbers = FALSE;
35 ret.mindepth = 0;
36 ret.filename = dupstr("output.1");
37 ret.charset = CS_ASCII;
38 ret.bullet = L"\x2022\0o\0\0";
39 ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
40 ret.rquote = uadv(ret.lquote);
41
42 /*
43 * Two-pass configuration so that we can pick up global config
44 * (e.g. `quotes') before having it overridden by specific
45 * config (`man-quotes'), irrespective of the order in which
46 * they occur.
47 */
48 for (p = source; p; p = p->next) {
49 if (p->type == para_Config) {
50 if (!ustricmp(p->keyword, L"quotes")) {
51 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
52 ret.lquote = uadv(p->keyword);
53 ret.rquote = uadv(ret.lquote);
54 }
55 }
56 }
57 }
58
59 for (p = source; p; p = p->next) {
60 if (p->type == para_Config) {
61 if (!ustricmp(p->keyword, L"man-identity")) {
62 wchar_t *wp, *ep;
63
64 wp = uadv(p->keyword);
65 ep = wp;
66 while (*ep)
67 ep = uadv(ep);
68 sfree(ret.th);
69 ret.th = snewn(ep - wp + 1, wchar_t);
70 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
71 } else if (!ustricmp(p->keyword, L"man-charset")) {
72 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
73 } else if (!ustricmp(p->keyword, L"man-headnumbers")) {
74 ret.headnumbers = utob(uadv(p->keyword));
75 } else if (!ustricmp(p->keyword, L"man-mindepth")) {
76 ret.mindepth = utoi(uadv(p->keyword));
77 } else if (!ustricmp(p->keyword, L"man-filename")) {
78 sfree(ret.filename);
79 ret.filename = dupstr(adv(p->origkeyword));
80 } else if (!ustricmp(p->keyword, L"man-bullet")) {
81 ret.bullet = uadv(p->keyword);
82 } else if (!ustricmp(p->keyword, L"man-quotes")) {
83 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
84 ret.lquote = uadv(p->keyword);
85 ret.rquote = uadv(ret.lquote);
86 }
87 }
88 }
89 }
90
91 /*
92 * Now process fallbacks on quote characters and bullets.
93 */
94 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
95 (!cvt_ok(ret.charset, ret.lquote) ||
96 !cvt_ok(ret.charset, ret.rquote))) {
97 ret.lquote = uadv(ret.rquote);
98 ret.rquote = uadv(ret.lquote);
99 }
100
101 while (*ret.bullet && *uadv(ret.bullet) &&
102 !cvt_ok(ret.charset, ret.bullet))
103 ret.bullet = uadv(ret.bullet);
104
105 return ret;
106 }
107
108 static void man_conf_cleanup(manconfig cf)
109 {
110 sfree(cf.th);
111 sfree(cf.filename);
112 }
113
114 paragraph *man_config_filename(char *filename)
115 {
116 return cmdline_cfg_simple("man-filename", filename, NULL);
117 }
118
119 #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
120 #define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
121
122 void man_backend(paragraph *sourceform, keywordlist *keywords,
123 indexdata *idx, void *unused) {
124 paragraph *p;
125 FILE *fp;
126 manconfig conf;
127 int had_described_thing;
128
129 IGNORE(unused);
130 IGNORE(keywords);
131 IGNORE(idx);
132
133 conf = man_configure(sourceform);
134
135 /*
136 * Open the output file.
137 */
138 fp = fopen(conf.filename, "w");
139 if (!fp) {
140 error(err_cantopenw, conf.filename);
141 return;
142 }
143
144 /* Do the version ID */
145 for (p = sourceform; p; p = p->next)
146 if (p->type == para_VersionID) {
147 fprintf(fp, ".\\\" ");
148 man_text(fp, p->words, TRUE, 0, &conf);
149 }
150
151 /* .TH name-of-program manual-section */
152 fprintf(fp, ".TH");
153 if (conf.th && *conf.th) {
154 char *c;
155 wchar_t *wp;
156
157 for (wp = conf.th; *wp; wp = uadv(wp)) {
158 fputs(" \"", fp);
159 man_convert(wp, 0, &c, QUOTE_QUOTES, conf.charset, NULL);
160 fputs(c, fp);
161 sfree(c);
162 fputc('"', fp);
163 }
164 }
165 fputc('\n', fp);
166
167 fprintf(fp, ".UC\n");
168
169 had_described_thing = FALSE;
170 #define cleanup_described_thing do { \
171 if (had_described_thing) \
172 fprintf(fp, "\n"); \
173 had_described_thing = FALSE; \
174 } while (0)
175
176 for (p = sourceform; p; p = p->next) switch (p->type) {
177 /*
178 * Things we ignore because we've already processed them or
179 * aren't going to touch them in this pass.
180 */
181 case para_IM:
182 case para_BR:
183 case para_Biblio: /* only touch BiblioCited */
184 case para_VersionID:
185 case para_NoCite:
186 case para_Title:
187 break;
188
189 /*
190 * Headings.
191 */
192 case para_Chapter:
193 case para_Appendix:
194 case para_UnnumberedChapter:
195 case para_Heading:
196 case para_Subsect:
197
198 cleanup_described_thing;
199 {
200 int depth;
201 if (p->type == para_Subsect)
202 depth = p->aux + 2;
203 else if (p->type == para_Heading)
204 depth = 1;
205 else
206 depth = 0;
207 if (depth >= conf.mindepth) {
208 if (depth > conf.mindepth)
209 fprintf(fp, ".SS \"");
210 else
211 fprintf(fp, ".SH \"");
212 if (conf.headnumbers && p->kwtext) {
213 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
214 fprintf(fp, " ");
215 }
216 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
217 fprintf(fp, "\"\n");
218 }
219 break;
220 }
221
222 /*
223 * Code paragraphs.
224 */
225 case para_Code:
226 cleanup_described_thing;
227 fprintf(fp, ".PP\n");
228 man_codepara(fp, p->words, conf.charset);
229 break;
230
231 /*
232 * Normal paragraphs.
233 */
234 case para_Normal:
235 case para_Copyright:
236 cleanup_described_thing;
237 fprintf(fp, ".PP\n");
238 man_text(fp, p->words, TRUE, 0, &conf);
239 break;
240
241 /*
242 * List paragraphs.
243 */
244 case para_Description:
245 case para_BiblioCited:
246 case para_Bullet:
247 case para_NumberedList:
248 if (p->type != para_Description)
249 cleanup_described_thing;
250
251 if (p->type == para_Bullet) {
252 char *bullettext;
253 man_convert(conf.bullet, -1, &bullettext, QUOTE_QUOTES,
254 conf.charset, NULL);
255 fprintf(fp, ".IP \"\\fB%s\\fP\"\n", bullettext);
256 sfree(bullettext);
257 } else if (p->type == para_NumberedList) {
258 fprintf(fp, ".IP \"");
259 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
260 fprintf(fp, "\"\n");
261 } else if (p->type == para_Description) {
262 if (had_described_thing) {
263 /*
264 * Do nothing; the .xP for this paragraph is the
265 * .IP which has come before it in the
266 * DescribedThing.
267 */
268 } else {
269 /*
270 * A \dd without a preceding \dt is given a blank
271 * one.
272 */
273 fprintf(fp, ".IP \"\"\n");
274 }
275 } else if (p->type == para_BiblioCited) {
276 fprintf(fp, ".IP \"");
277 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
278 fprintf(fp, "\"\n");
279 }
280 man_text(fp, p->words, TRUE, 0, &conf);
281 had_described_thing = FALSE;
282 break;
283
284 case para_DescribedThing:
285 cleanup_described_thing;
286 fprintf(fp, ".IP \"");
287 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
288 fprintf(fp, "\"\n");
289 had_described_thing = TRUE;
290 break;
291
292 case para_Rule:
293 /*
294 * This isn't terribly good. Anyone who wants to do better
295 * should feel free!
296 */
297 cleanup_described_thing;
298 fprintf(fp, ".PP\n----------------------------------------\n");
299 break;
300
301 case para_LcontPush:
302 case para_QuotePush:
303 cleanup_described_thing;
304 fprintf(fp, ".RS\n");
305 break;
306 case para_LcontPop:
307 case para_QuotePop:
308 cleanup_described_thing;
309 fprintf(fp, ".RE\n");
310 break;
311 }
312 cleanup_described_thing;
313
314 /*
315 * Tidy up.
316 */
317 fclose(fp);
318 man_conf_cleanup(conf);
319 }
320
321 /*
322 * Convert a wide string into a string of chars; mallocs the
323 * resulting string and stores a pointer to it in `*result'.
324 *
325 * If `state' is non-NULL, updates the charset state pointed to. If
326 * `state' is NULL, this function uses its own state, initialises
327 * it from scratch, and cleans it up when finished. If `state' is
328 * non-NULL but _s_ is NULL, cleans up a provided state.
329 *
330 * Return is nonzero if all characters are OK. If not all
331 * characters are OK but `result' is non-NULL, a result _will_
332 * still be generated!
333 *
334 * This function also does escaping of groff special characters.
335 */
336 static int man_convert(wchar_t const *s, int maxlen,
337 char **result, int quote_props,
338 int charset, charset_state *state) {
339 charset_state internal_state = CHARSET_INIT_STATE;
340 int slen, err;
341 char *p = NULL, *q;
342 int plen = 0, psize = 0;
343 rdstringc out = {0, 0, NULL};
344
345 if (!state)
346 state = &internal_state;
347
348 slen = (s ? ustrlen(s) : 0);
349
350 if (slen > maxlen && maxlen > 0)
351 slen = maxlen;
352
353 psize = 384;
354 plen = 0;
355 p = snewn(psize, char);
356 err = 0;
357
358 while (slen > 0) {
359 int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen,
360 charset, state, (err ? NULL : &err));
361 if (ret > 0) {
362 plen += ret;
363 if (psize - plen < 256) {
364 psize = plen + 256;
365 p = sresize(p, psize, char);
366 }
367 }
368 }
369
370 if (state == &internal_state || s == NULL) {
371 int ret = charset_from_unicode(NULL, 0, p+plen, psize-plen,
372 charset, state, NULL);
373 if (ret > 0)
374 plen += ret;
375 }
376
377 for (q = p; q < p+plen; q++) {
378 if (q == p && (*q == '.' || *q == '\'') &&
379 (quote_props & QUOTE_INITCTRL)) {
380 /*
381 * Control character (. or ') at the start of a
382 * line. Quote it by putting \& (troff zero-width
383 * space) before it.
384 */
385 rdaddc(&out, '\\');
386 rdaddc(&out, '&');
387 } else if (*q == '\\') {
388 /*
389 * Quote backslashes by doubling them, always.
390 */
391 rdaddc(&out, '\\');
392 } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) {
393 /*
394 * Double quote within double quotes. Quote it by
395 * doubling.
396 */
397 rdaddc(&out, '"');
398 }
399 rdaddc(&out, *q);
400 }
401
402 sfree(p);
403
404 if (out.text)
405 *result = rdtrimc(&out);
406 else
407 *result = dupstr("");
408
409 return !err;
410 }
411
412 static int man_rdaddwc(rdstringc *rs, word *text, word *end,
413 int quote_props, manconfig *conf,
414 charset_state *state) {
415 char *c;
416
417 for (; text && text != end; text = text->next) switch (text->type) {
418 case word_HyperLink:
419 case word_HyperEnd:
420 case word_UpperXref:
421 case word_LowerXref:
422 case word_XrefEnd:
423 case word_IndexRef:
424 break;
425
426 case word_Normal:
427 case word_Emph:
428 case word_Code:
429 case word_WeakCode:
430 case word_WhiteSpace:
431 case word_EmphSpace:
432 case word_CodeSpace:
433 case word_WkCodeSpace:
434 case word_Quote:
435 case word_EmphQuote:
436 case word_CodeQuote:
437 case word_WkCodeQuote:
438 assert(text->type != word_CodeQuote &&
439 text->type != word_WkCodeQuote);
440
441 if (towordstyle(text->type) == word_Emph &&
442 (attraux(text->aux) == attr_First ||
443 attraux(text->aux) == attr_Only)) {
444 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
445 rdaddsc(rs, c);
446 if (*c)
447 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
448 sfree(c);
449 *state = charset_init_state;
450 rdaddsc(rs, "\\fI");
451 } else if ((towordstyle(text->type) == word_Code ||
452 towordstyle(text->type) == word_WeakCode) &&
453 (attraux(text->aux) == attr_First ||
454 attraux(text->aux) == attr_Only)) {
455 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
456 rdaddsc(rs, c);
457 if (*c)
458 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
459 sfree(c);
460 *state = charset_init_state;
461 rdaddsc(rs, "\\fB");
462 }
463
464 if (removeattr(text->type) == word_Normal) {
465 charset_state s2 = *state;
466
467 if (man_convert(text->text, 0, &c, quote_props, conf->charset, &s2) ||
468 !text->alt) {
469 rdaddsc(rs, c);
470 if (*c)
471 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
472 *state = s2;
473 } else {
474 quote_props = man_rdaddwc(rs, text->alt, NULL,
475 quote_props, conf, state);
476 }
477 sfree(c);
478 } else if (removeattr(text->type) == word_WhiteSpace) {
479 man_convert(L" ", 1, &c, quote_props, conf->charset, state);
480 rdaddsc(rs, c);
481 if (*c)
482 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
483 sfree(c);
484 } else if (removeattr(text->type) == word_Quote) {
485 man_convert(quoteaux(text->aux) == quote_Open ?
486 conf->lquote : conf->rquote, 0,
487 &c, quote_props, conf->charset, state);
488 rdaddsc(rs, c);
489 if (*c)
490 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
491 sfree(c);
492 }
493 if (towordstyle(text->type) != word_Normal &&
494 (attraux(text->aux) == attr_Last ||
495 attraux(text->aux) == attr_Only)) {
496 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
497 rdaddsc(rs, c);
498 if (*c)
499 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
500 sfree(c);
501 *state = charset_init_state;
502 rdaddsc(rs, "\\fP");
503 }
504 break;
505 }
506 man_convert(NULL, 0, &c, quote_props, conf->charset, state);
507 rdaddsc(rs, c);
508 if (*c)
509 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
510 sfree(c);
511
512 return quote_props;
513 }
514
515 static void man_text(FILE *fp, word *text, int newline,
516 int quote_props, manconfig *conf) {
517 rdstringc t = { 0, 0, NULL };
518 charset_state state = CHARSET_INIT_STATE;
519
520 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL, conf, &state);
521 fprintf(fp, "%s", t.text);
522 sfree(t.text);
523 if (newline)
524 fputc('\n', fp);
525 }
526
527 static void man_codepara(FILE *fp, word *text, int charset) {
528 fprintf(fp, ".nf\n");
529 for (; text; text = text->next) if (text->type == word_WeakCode) {
530 char *c;
531 wchar_t *t, *e;
532 int quote_props = QUOTE_INITCTRL;
533
534 t = text->text;
535 if (text->next && text->next->type == word_Emph) {
536 e = text->next->text;
537 text = text->next;
538 } else
539 e = NULL;
540
541 while (e && *e && *t) {
542 int n;
543 int ec = *e;
544
545 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
546 if (ec == 'i')
547 fprintf(fp, "\\fI");
548 else if (ec == 'b')
549 fprintf(fp, "\\fB");
550 man_convert(t, n, &c, quote_props, charset, NULL);
551 quote_props &= ~QUOTE_INITCTRL;
552 fprintf(fp, "%s", c);
553 sfree(c);
554 if (ec == 'i' || ec == 'b')
555 fprintf(fp, "\\fP");
556 t += n;
557 e += n;
558 }
559 man_convert(t, 0, &c, quote_props, charset, NULL);
560 fprintf(fp, "%s\n", c);
561 sfree(c);
562 }
563 fprintf(fp, ".fi\n");
564 }