Kurt Jung points out that nav menus ought to be WHLP_PARA_SCROLL,
[sgt/halibut] / bk_man.c
CommitLineData
7136a6c7 1/*
2 * man page backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
93688997 10static void man_text(FILE *, word *,
11 int newline, int quote_props, int charset);
12static void man_codepara(FILE *, word *, int charset);
13static int man_convert(wchar_t const *s, int maxlen,
14 char **result, int quote_props,
15 int charset, charset_state *state);
4b3c5afb 16
17typedef struct {
18 wchar_t *th;
19 int headnumbers;
20 int mindepth;
50d6b4bd 21 char *filename;
93688997 22 int charset;
4b3c5afb 23} manconfig;
24
25static manconfig man_configure(paragraph *source) {
26 manconfig ret;
27
28 /*
29 * Defaults.
30 */
31 ret.th = NULL;
32 ret.headnumbers = FALSE;
33 ret.mindepth = 0;
50d6b4bd 34 ret.filename = dupstr("output.1");
93688997 35 ret.charset = CS_ASCII;
4b3c5afb 36
37 for (; source; source = source->next) {
38 if (source->type == para_Config) {
39 if (!ustricmp(source->keyword, L"man-identity")) {
40 wchar_t *wp, *ep;
41
42 wp = uadv(source->keyword);
43 ep = wp;
44 while (*ep)
45 ep = uadv(ep);
50d6b4bd 46 sfree(ret.th);
4b3c5afb 47 ret.th = mknewa(wchar_t, ep - wp + 1);
48 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
93688997 49 } else if (!ustricmp(source->keyword, L"man-charset")) {
50 char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
51 ret.charset = charset_from_localenc(csname);
52 sfree(csname);
4b3c5afb 53 } else if (!ustricmp(source->keyword, L"man-headnumbers")) {
54 ret.headnumbers = utob(uadv(source->keyword));
55 } else if (!ustricmp(source->keyword, L"man-mindepth")) {
56 ret.mindepth = utoi(uadv(source->keyword));
50d6b4bd 57 } else if (!ustricmp(source->keyword, L"man-filename")) {
58 sfree(ret.filename);
e4ea58f8 59 ret.filename = dupstr(adv(source->origkeyword));
4b3c5afb 60 }
61 }
62 }
63
64 return ret;
65}
66
67static void man_conf_cleanup(manconfig cf)
68{
69 sfree(cf.th);
50d6b4bd 70 sfree(cf.filename);
4b3c5afb 71}
7136a6c7 72
ba9c1487 73paragraph *man_config_filename(char *filename)
74{
e4ea58f8 75 return cmdline_cfg_simple("man-filename", filename, NULL);
ba9c1487 76}
77
7136a6c7 78#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
79#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
80
81void man_backend(paragraph *sourceform, keywordlist *keywords,
43341922 82 indexdata *idx, void *unused) {
7136a6c7 83 paragraph *p;
84 FILE *fp;
4b3c5afb 85 manconfig conf;
7136a6c7 86
43341922 87 IGNORE(unused);
88 IGNORE(keywords);
89 IGNORE(idx);
7136a6c7 90
4b3c5afb 91 conf = man_configure(sourceform);
92
7136a6c7 93 /*
50d6b4bd 94 * Open the output file.
7136a6c7 95 */
50d6b4bd 96 fp = fopen(conf.filename, "w");
7136a6c7 97 if (!fp) {
50d6b4bd 98 error(err_cantopenw, conf.filename);
7136a6c7 99 return;
100 }
101
102 /* Do the version ID */
103 for (p = sourceform; p; p = p->next)
104 if (p->type == para_VersionID) {
105 fprintf(fp, ".\\\" ");
93688997 106 man_text(fp, p->words, TRUE, 0, conf.charset);
7136a6c7 107 }
108
4b3c5afb 109 /* .TH name-of-program manual-section */
22905f72 110 fprintf(fp, ".TH");
111 if (conf.th && *conf.th) {
4b3c5afb 112 char *c;
22905f72 113 wchar_t *wp;
114
115 for (wp = conf.th; *wp; wp = uadv(wp)) {
116 fputs(" \"", fp);
93688997 117 man_convert(wp, 0, &c, QUOTE_QUOTES, conf.charset, NULL);
22905f72 118 fputs(c, fp);
119 sfree(c);
120 fputc('"', fp);
4b3c5afb 121 }
122 }
22905f72 123 fputc('\n', fp);
7136a6c7 124
125 fprintf(fp, ".UC\n");
126
7136a6c7 127 for (p = sourceform; p; p = p->next) switch (p->type) {
128 /*
129 * Things we ignore because we've already processed them or
130 * aren't going to touch them in this pass.
131 */
132 case para_IM:
133 case para_BR:
134 case para_Biblio: /* only touch BiblioCited */
135 case para_VersionID:
7136a6c7 136 case para_NoCite:
137 case para_Title:
138 break;
139
140 /*
141 * Headings.
142 */
143 case para_Chapter:
144 case para_Appendix:
145 case para_UnnumberedChapter:
146 case para_Heading:
147 case para_Subsect:
8902e0ed 148
4b3c5afb 149 {
150 int depth;
151 if (p->type == para_Subsect)
152 depth = p->aux + 2;
153 else if (p->type == para_Heading)
154 depth = 1;
155 else
156 depth = 0;
157 if (depth >= conf.mindepth) {
158 fprintf(fp, ".SH \"");
159 if (conf.headnumbers && p->kwtext) {
93688997 160 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, conf.charset);
4b3c5afb 161 fprintf(fp, " ");
162 }
93688997 163 man_text(fp, p->words, FALSE, QUOTE_QUOTES, conf.charset);
4b3c5afb 164 fprintf(fp, "\"\n");
165 }
166 break;
167 }
7136a6c7 168
169 /*
170 * Code paragraphs.
171 */
172 case para_Code:
173 fprintf(fp, ".PP\n");
93688997 174 man_codepara(fp, p->words, conf.charset);
7136a6c7 175 break;
176
177 /*
178 * Normal paragraphs.
179 */
180 case para_Normal:
9057a0a8 181 case para_Copyright:
7136a6c7 182 fprintf(fp, ".PP\n");
93688997 183 man_text(fp, p->words, TRUE, 0, conf.charset);
7136a6c7 184 break;
185
186 /*
187 * List paragraphs.
188 */
189 case para_Description:
190 case para_BiblioCited:
191 case para_Bullet:
192 case para_NumberedList:
193 if (p->type == para_Bullet) {
194 fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
195 } else if (p->type == para_NumberedList) {
196 fprintf(fp, ".IP \"");
93688997 197 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, conf.charset);
7136a6c7 198 fprintf(fp, "\"\n");
199 } else if (p->type == para_Description) {
200 /*
201 * Do nothing; the .xP for this paragraph is the .IP
202 * which has come before it in the DescribedThing.
203 */
204 } else if (p->type == para_BiblioCited) {
205 fprintf(fp, ".IP \"");
93688997 206 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, conf.charset);
7136a6c7 207 fprintf(fp, "\"\n");
208 }
93688997 209 man_text(fp, p->words, TRUE, 0, conf.charset);
7136a6c7 210 break;
211
212 case para_DescribedThing:
213 fprintf(fp, ".IP \"");
93688997 214 man_text(fp, p->words, FALSE, QUOTE_QUOTES, conf.charset);
7136a6c7 215 fprintf(fp, "\"\n");
216 break;
217
218 case para_Rule:
219 /*
4b3c5afb 220 * This isn't terribly good. Anyone who wants to do better
221 * should feel free!
7136a6c7 222 */
4b3c5afb 223 fprintf(fp, ".PP\n----------------------------------------\n");
7136a6c7 224 break;
225
226 case para_LcontPush:
2614b01d 227 case para_QuotePush:
7136a6c7 228 fprintf(fp, ".RS\n");
229 break;
230 case para_LcontPop:
2614b01d 231 case para_QuotePop:
7136a6c7 232 fprintf(fp, ".RE\n");
233 break;
234 }
235
236 /*
237 * Tidy up.
238 */
239 fclose(fp);
4b3c5afb 240 man_conf_cleanup(conf);
7136a6c7 241}
242
243/*
93688997 244 * Convert a wide string into a string of chars; mallocs the
245 * resulting string and stores a pointer to it in `*result'.
246 *
247 * If `state' is non-NULL, updates the charset state pointed to. If
248 * `state' is NULL, this function uses its own state, initialises
249 * it from scratch, and cleans it up when finished. If `state' is
250 * non-NULL but _s_ is NULL, cleans up a provided state.
7136a6c7 251 *
252 * Return is nonzero if all characters are OK. If not all
253 * characters are OK but `result' is non-NULL, a result _will_
254 * still be generated!
255 *
93688997 256 * This function also does escaping of groff special characters.
7136a6c7 257 */
93688997 258static int man_convert(wchar_t const *s, int maxlen,
259 char **result, int quote_props,
260 int charset, charset_state *state) {
261 charset_state internal_state = CHARSET_INIT_STATE;
262 int slen, err;
263 char *p = NULL, *q;
7136a6c7 264 int plen = 0, psize = 0;
93688997 265 rdstringc out = {0, 0, NULL};
7136a6c7 266
93688997 267 if (!state)
268 state = &internal_state;
269
270 slen = (s ? ustrlen(s) : 0);
271
272 if (slen > maxlen && maxlen > 0)
273 slen = maxlen;
274
275 psize = 384;
276 plen = 0;
277 p = mknewa(char, psize);
278 err = 0;
279
280 while (slen > 0) {
281 int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen,
282 charset, state, (err ? NULL : &err));
283 if (ret > 0) {
284 plen += ret;
285 if (psize - plen < 256) {
7136a6c7 286 psize = plen + 256;
287 p = resize(p, psize);
288 }
7136a6c7 289 }
290 }
93688997 291
292 if (state == &internal_state || s == NULL) {
293 int ret = charset_from_unicode(NULL, 0, p+plen, psize-plen,
294 charset, state, NULL);
295 if (ret > 0)
296 plen += ret;
7136a6c7 297 }
93688997 298
299 for (q = p; q < p+plen; q++) {
300 if (q == p && (*q == '.' || *q == '\'') &&
301 (quote_props & QUOTE_INITCTRL)) {
302 /*
303 * Control character (. or ') at the start of a
304 * line. Quote it by putting \& (troff zero-width
305 * space) before it.
306 */
307 rdaddc(&out, '\\');
308 rdaddc(&out, '&');
309 } else if (*q == '\\') {
310 /*
311 * Quote backslashes by doubling them, always.
312 */
313 rdaddc(&out, '\\');
314 } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) {
315 /*
316 * Double quote within double quotes. Quote it by
317 * doubling.
318 */
319 rdaddc(&out, '"');
320 }
321 rdaddc(&out, *q);
322 }
323
324 sfree(p);
325
326 if (out.text)
327 *result = rdtrimc(&out);
328 else
329 *result = dupstr("");
330
331 return !err;
7136a6c7 332}
333
334static void man_rdaddwc(rdstringc *rs, word *text, word *end,
93688997 335 int quote_props, int charset, charset_state *state) {
7136a6c7 336 char *c;
337
338 for (; text && text != end; text = text->next) switch (text->type) {
339 case word_HyperLink:
340 case word_HyperEnd:
341 case word_UpperXref:
342 case word_LowerXref:
343 case word_XrefEnd:
344 case word_IndexRef:
345 break;
346
347 case word_Normal:
348 case word_Emph:
349 case word_Code:
350 case word_WeakCode:
351 case word_WhiteSpace:
352 case word_EmphSpace:
353 case word_CodeSpace:
354 case word_WkCodeSpace:
355 case word_Quote:
356 case word_EmphQuote:
357 case word_CodeQuote:
358 case word_WkCodeQuote:
359 assert(text->type != word_CodeQuote &&
360 text->type != word_WkCodeQuote);
93688997 361
7136a6c7 362 if (towordstyle(text->type) == word_Emph &&
363 (attraux(text->aux) == attr_First ||
93688997 364 attraux(text->aux) == attr_Only)) {
365 if (rs->pos > 0)
366 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
367 man_convert(NULL, 0, &c, quote_props, charset, state);
368 rdaddsc(rs, c);
369 sfree(c);
370 *state = charset_init_state;
7136a6c7 371 rdaddsc(rs, "\\fI");
93688997 372 } else if ((towordstyle(text->type) == word_Code ||
373 towordstyle(text->type) == word_WeakCode) &&
374 (attraux(text->aux) == attr_First ||
375 attraux(text->aux) == attr_Only)) {
376 if (rs->pos > 0)
377 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
378 man_convert(NULL, 0, &c, quote_props, charset, state);
379 rdaddsc(rs, c);
380 sfree(c);
381 *state = charset_init_state;
7136a6c7 382 rdaddsc(rs, "\\fB");
93688997 383 }
384
7136a6c7 385 if (removeattr(text->type) == word_Normal) {
93688997 386 charset_state s2 = *state;
387
7136a6c7 388 if (rs->pos > 0)
389 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
93688997 390 if (man_convert(text->text, 0, &c, quote_props, charset, &s2) ||
391 !text->alt) {
7136a6c7 392 rdaddsc(rs, c);
93688997 393 *state = s2;
394 } else {
395 man_rdaddwc(rs, text->alt, NULL, quote_props, charset, state);
396 }
7136a6c7 397 sfree(c);
398 } else if (removeattr(text->type) == word_WhiteSpace) {
93688997 399 if (rs->pos > 0)
400 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
401 man_convert(L" ", 1, &c, quote_props, charset, state);
402 rdaddsc(rs, c);
403 sfree(c);
7136a6c7 404 } else if (removeattr(text->type) == word_Quote) {
93688997 405 if (rs->pos > 0)
406 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
407 man_convert(L"\"", 1, &c, quote_props, charset, state);
408 rdaddsc(rs, c);
409 sfree(c);
7136a6c7 410 }
93688997 411 if (towordstyle(text->type) != word_Normal &&
7136a6c7 412 (attraux(text->aux) == attr_Last ||
93688997 413 attraux(text->aux) == attr_Only)) {
414 if (rs->pos > 0)
415 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
416 man_convert(NULL, 0, &c, quote_props, charset, state);
417 rdaddsc(rs, c);
418 sfree(c);
419 *state = charset_init_state;
7136a6c7 420 rdaddsc(rs, "\\fP");
93688997 421 }
7136a6c7 422 break;
423 }
93688997 424 man_convert(NULL, 0, &c, quote_props, charset, state);
425 rdaddsc(rs, c);
426 sfree(c);
7136a6c7 427}
428
93688997 429static void man_text(FILE *fp, word *text, int newline,
430 int quote_props, int charset) {
7136a6c7 431 rdstringc t = { 0, 0, NULL };
93688997 432 charset_state state = CHARSET_INIT_STATE;
7136a6c7 433
93688997 434 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL, charset, &state);
7136a6c7 435 fprintf(fp, "%s", t.text);
436 sfree(t.text);
437 if (newline)
438 fputc('\n', fp);
439}
440
93688997 441static void man_codepara(FILE *fp, word *text, int charset) {
7136a6c7 442 fprintf(fp, ".nf\n");
443 for (; text; text = text->next) if (text->type == word_WeakCode) {
444 char *c;
4b3c5afb 445 wchar_t *t, *e;
446 int quote_props = QUOTE_INITCTRL;
447
448 t = text->text;
449 if (text->next && text->next->type == word_Emph) {
450 e = text->next->text;
451 text = text->next;
452 } else
453 e = NULL;
454
455 while (e && *e && *t) {
456 int n;
457 int ec = *e;
458
459 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
460 if (ec == 'i')
461 fprintf(fp, "\\fI");
462 else if (ec == 'b')
463 fprintf(fp, "\\fB");
93688997 464 man_convert(t, n, &c, quote_props, charset, NULL);
4b3c5afb 465 quote_props &= ~QUOTE_INITCTRL;
466 fprintf(fp, "%s", c);
467 sfree(c);
468 if (ec == 'i' || ec == 'b')
469 fprintf(fp, "\\fP");
470 t += n;
471 e += n;
472 }
93688997 473 man_convert(t, 0, &c, quote_props, charset, NULL);
7136a6c7 474 fprintf(fp, "%s\n", c);
475 sfree(c);
476 }
477 fprintf(fp, ".fi\n");
478}