Double yikes! I meant to check in just biblio.c, but instead I
[sgt/halibut] / bk_man.c
CommitLineData
7136a6c7 1/*
2 * man page backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10static void man_text(FILE *, word *, int newline, int quote_props);
11static void man_codepara(FILE *, word *);
4b3c5afb 12static int man_convert(wchar_t *s, int maxlen,
13 char **result, int quote_props);
14
15typedef struct {
16 wchar_t *th;
17 int headnumbers;
18 int mindepth;
50d6b4bd 19 char *filename;
4b3c5afb 20} manconfig;
21
22static manconfig man_configure(paragraph *source) {
23 manconfig ret;
24
25 /*
26 * Defaults.
27 */
28 ret.th = NULL;
29 ret.headnumbers = FALSE;
30 ret.mindepth = 0;
50d6b4bd 31 ret.filename = dupstr("output.1");
4b3c5afb 32
33 for (; source; source = source->next) {
34 if (source->type == para_Config) {
35 if (!ustricmp(source->keyword, L"man-identity")) {
36 wchar_t *wp, *ep;
37
38 wp = uadv(source->keyword);
39 ep = wp;
40 while (*ep)
41 ep = uadv(ep);
50d6b4bd 42 sfree(ret.th);
4b3c5afb 43 ret.th = mknewa(wchar_t, ep - wp + 1);
44 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
45 } else if (!ustricmp(source->keyword, L"man-headnumbers")) {
46 ret.headnumbers = utob(uadv(source->keyword));
47 } else if (!ustricmp(source->keyword, L"man-mindepth")) {
48 ret.mindepth = utoi(uadv(source->keyword));
50d6b4bd 49 } else if (!ustricmp(source->keyword, L"man-filename")) {
50 sfree(ret.filename);
51 ret.filename = utoa_dup(uadv(source->keyword));
4b3c5afb 52 }
53 }
54 }
55
56 return ret;
57}
58
59static void man_conf_cleanup(manconfig cf)
60{
61 sfree(cf.th);
50d6b4bd 62 sfree(cf.filename);
4b3c5afb 63}
7136a6c7 64
ba9c1487 65paragraph *man_config_filename(char *filename)
66{
67 paragraph *p;
68 wchar_t *ufilename, *up;
69 int len;
70
71 p = mknew(paragraph);
72 memset(p, 0, sizeof(*p));
73 p->type = para_Config;
74 p->next = NULL;
75 p->fpos.filename = "<command line>";
76 p->fpos.line = p->fpos.col = -1;
77
78 ufilename = ufroma_dup(filename);
79 len = ustrlen(ufilename) + 2 + lenof(L"man-filename");
80 p->keyword = mknewa(wchar_t, len);
81 up = p->keyword;
82 ustrcpy(up, L"man-filename");
83 up = uadv(up);
84 ustrcpy(up, ufilename);
85 up = uadv(up);
86 *up = L'\0';
87 assert(up - p->keyword < len);
88 sfree(ufilename);
89
90 return p;
91}
92
7136a6c7 93#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
94#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
95
96void man_backend(paragraph *sourceform, keywordlist *keywords,
e62b3302 97 indexdata *idx) {
7136a6c7 98 paragraph *p;
99 FILE *fp;
4b3c5afb 100 manconfig conf;
7136a6c7 101
e62b3302 102 IGNORE(keywords); /* we don't happen to need this */
103 IGNORE(idx); /* or this */
7136a6c7 104
4b3c5afb 105 conf = man_configure(sourceform);
106
7136a6c7 107 /*
50d6b4bd 108 * Open the output file.
7136a6c7 109 */
50d6b4bd 110 fp = fopen(conf.filename, "w");
7136a6c7 111 if (!fp) {
50d6b4bd 112 error(err_cantopenw, conf.filename);
7136a6c7 113 return;
114 }
115
116 /* Do the version ID */
117 for (p = sourceform; p; p = p->next)
118 if (p->type == para_VersionID) {
119 fprintf(fp, ".\\\" ");
120 man_text(fp, p->words, TRUE, 0);
121 }
122
4b3c5afb 123 /* .TH name-of-program manual-section */
22905f72 124 fprintf(fp, ".TH");
125 if (conf.th && *conf.th) {
4b3c5afb 126 char *c;
22905f72 127 wchar_t *wp;
128
129 for (wp = conf.th; *wp; wp = uadv(wp)) {
130 fputs(" \"", fp);
131 man_convert(wp, 0, &c, QUOTE_QUOTES);
132 fputs(c, fp);
133 sfree(c);
134 fputc('"', fp);
4b3c5afb 135 }
136 }
22905f72 137 fputc('\n', fp);
7136a6c7 138
139 fprintf(fp, ".UC\n");
140
7136a6c7 141 for (p = sourceform; p; p = p->next) switch (p->type) {
142 /*
143 * Things we ignore because we've already processed them or
144 * aren't going to touch them in this pass.
145 */
146 case para_IM:
147 case para_BR:
148 case para_Biblio: /* only touch BiblioCited */
149 case para_VersionID:
7136a6c7 150 case para_NoCite:
151 case para_Title:
152 break;
153
154 /*
155 * Headings.
156 */
157 case para_Chapter:
158 case para_Appendix:
159 case para_UnnumberedChapter:
160 case para_Heading:
161 case para_Subsect:
8902e0ed 162
4b3c5afb 163 {
164 int depth;
165 if (p->type == para_Subsect)
166 depth = p->aux + 2;
167 else if (p->type == para_Heading)
168 depth = 1;
169 else
170 depth = 0;
171 if (depth >= conf.mindepth) {
172 fprintf(fp, ".SH \"");
173 if (conf.headnumbers && p->kwtext) {
174 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
175 fprintf(fp, " ");
176 }
177 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
178 fprintf(fp, "\"\n");
179 }
180 break;
181 }
7136a6c7 182
183 /*
184 * Code paragraphs.
185 */
186 case para_Code:
187 fprintf(fp, ".PP\n");
188 man_codepara(fp, p->words);
189 break;
190
191 /*
192 * Normal paragraphs.
193 */
194 case para_Normal:
9057a0a8 195 case para_Copyright:
7136a6c7 196 fprintf(fp, ".PP\n");
197 man_text(fp, p->words, TRUE, 0);
198 break;
199
200 /*
201 * List paragraphs.
202 */
203 case para_Description:
204 case para_BiblioCited:
205 case para_Bullet:
206 case para_NumberedList:
207 if (p->type == para_Bullet) {
208 fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
209 } else if (p->type == para_NumberedList) {
210 fprintf(fp, ".IP \"");
211 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
212 fprintf(fp, "\"\n");
213 } else if (p->type == para_Description) {
214 /*
215 * Do nothing; the .xP for this paragraph is the .IP
216 * which has come before it in the DescribedThing.
217 */
218 } else if (p->type == para_BiblioCited) {
219 fprintf(fp, ".IP \"");
220 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
221 fprintf(fp, "\"\n");
222 }
223 man_text(fp, p->words, TRUE, 0);
224 break;
225
226 case para_DescribedThing:
227 fprintf(fp, ".IP \"");
228 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
229 fprintf(fp, "\"\n");
230 break;
231
232 case para_Rule:
233 /*
4b3c5afb 234 * This isn't terribly good. Anyone who wants to do better
235 * should feel free!
7136a6c7 236 */
4b3c5afb 237 fprintf(fp, ".PP\n----------------------------------------\n");
7136a6c7 238 break;
239
240 case para_LcontPush:
2614b01d 241 case para_QuotePush:
7136a6c7 242 fprintf(fp, ".RS\n");
243 break;
244 case para_LcontPop:
2614b01d 245 case para_QuotePop:
7136a6c7 246 fprintf(fp, ".RE\n");
247 break;
248 }
249
250 /*
251 * Tidy up.
252 */
253 fclose(fp);
4b3c5afb 254 man_conf_cleanup(conf);
7136a6c7 255}
256
257/*
258 * Convert a wide string into a string of chars. If `result' is
259 * non-NULL, mallocs the resulting string and stores a pointer to
260 * it in `*result'. If `result' is NULL, merely checks whether all
261 * characters in the string are feasible for the output character
262 * set.
263 *
264 * Return is nonzero if all characters are OK. If not all
265 * characters are OK but `result' is non-NULL, a result _will_
266 * still be generated!
267 *
268 * FIXME: Here is probably also a good place to do escaping sorts
269 * of things. I know I at least need to escape backslash, and full
270 * stops at the starts of words are probably trouble as well.
271 */
4b3c5afb 272static int man_convert(wchar_t *s, int maxlen,
273 char **result, int quote_props) {
7136a6c7 274 /*
275 * FIXME. Currently this is ISO8859-1 only.
276 */
277 int doing = (result != 0);
278 int ok = TRUE;
279 char *p = NULL;
280 int plen = 0, psize = 0;
281
4b3c5afb 282 if (maxlen <= 0)
283 maxlen = -1;
284
285 for (; *s && maxlen != 0; s++, maxlen--) {
7136a6c7 286 wchar_t c = *s;
287 char outc;
288
289 if ((c >= 32 && c <= 126) ||
290 (c >= 160 && c <= 255)) {
291 /* Char is OK. */
292 outc = (char)c;
293 } else {
294 /* Char is not OK. */
295 ok = FALSE;
296 outc = 0xBF; /* approximate the good old DEC `uh?' */
297 }
298 if (doing) {
299 if (plen+3 >= psize) {
300 psize = plen + 256;
301 p = resize(p, psize);
302 }
303 if (plen == 0 && (outc == '.' || outc == '\'') &&
304 (quote_props & QUOTE_INITCTRL)) {
305 /*
306 * Control character (. or ') at the start of a
307 * line. Quote it by putting \& (troff zero-width
308 * space) before it.
309 */
310 p[plen++] = '\\';
311 p[plen++] = '&';
312 } else if (outc == '\\') {
313 /*
314 * Quote backslashes by doubling them, always.
315 */
316 p[plen++] = '\\';
317 } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) {
318 /*
319 * Double quote within double quotes. Quote it by
320 * doubling.
321 */
322 p[plen++] = '"';
323 }
324 p[plen++] = outc;
325 }
326 }
327 if (doing) {
328 p = resize(p, plen+1);
329 p[plen] = '\0';
330 *result = p;
331 }
332 return ok;
333}
334
335static void man_rdaddwc(rdstringc *rs, word *text, word *end,
336 int quote_props) {
337 char *c;
338
339 for (; text && text != end; text = text->next) switch (text->type) {
340 case word_HyperLink:
341 case word_HyperEnd:
342 case word_UpperXref:
343 case word_LowerXref:
344 case word_XrefEnd:
345 case word_IndexRef:
346 break;
347
348 case word_Normal:
349 case word_Emph:
350 case word_Code:
351 case word_WeakCode:
352 case word_WhiteSpace:
353 case word_EmphSpace:
354 case word_CodeSpace:
355 case word_WkCodeSpace:
356 case word_Quote:
357 case word_EmphQuote:
358 case word_CodeQuote:
359 case word_WkCodeQuote:
360 assert(text->type != word_CodeQuote &&
361 text->type != word_WkCodeQuote);
362 if (towordstyle(text->type) == word_Emph &&
363 (attraux(text->aux) == attr_First ||
364 attraux(text->aux) == attr_Only))
365 rdaddsc(rs, "\\fI");
4b3c5afb 366 else if ((towordstyle(text->type) == word_Code ||
367 towordstyle(text->type) == word_WeakCode) &&
7136a6c7 368 (attraux(text->aux) == attr_First ||
369 attraux(text->aux) == attr_Only))
370 rdaddsc(rs, "\\fB");
371 if (removeattr(text->type) == word_Normal) {
372 if (rs->pos > 0)
373 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
4b3c5afb 374 if (man_convert(text->text, 0, &c, quote_props))
7136a6c7 375 rdaddsc(rs, c);
376 else
377 man_rdaddwc(rs, text->alt, NULL, quote_props);
378 sfree(c);
379 } else if (removeattr(text->type) == word_WhiteSpace) {
380 rdaddc(rs, ' ');
381 } else if (removeattr(text->type) == word_Quote) {
4b3c5afb 382 rdaddc(rs, '"');
383 if (quote_props & QUOTE_QUOTES)
384 rdaddc(rs, '"');
7136a6c7 385 }
386 if (towordstyle(text->type) == word_Emph &&
387 (attraux(text->aux) == attr_Last ||
388 attraux(text->aux) == attr_Only))
389 rdaddsc(rs, "\\fP");
4b3c5afb 390 else if ((towordstyle(text->type) == word_Code ||
391 towordstyle(text->type) == word_WeakCode) &&
7136a6c7 392 (attraux(text->aux) == attr_Last ||
393 attraux(text->aux) == attr_Only))
394 rdaddsc(rs, "\\fP");
395 break;
396 }
397}
398
399static void man_text(FILE *fp, word *text, int newline, int quote_props) {
400 rdstringc t = { 0, 0, NULL };
401
402 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL);
403 fprintf(fp, "%s", t.text);
404 sfree(t.text);
405 if (newline)
406 fputc('\n', fp);
407}
408
409static void man_codepara(FILE *fp, word *text) {
410 fprintf(fp, ".nf\n");
411 for (; text; text = text->next) if (text->type == word_WeakCode) {
412 char *c;
4b3c5afb 413 wchar_t *t, *e;
414 int quote_props = QUOTE_INITCTRL;
415
416 t = text->text;
417 if (text->next && text->next->type == word_Emph) {
418 e = text->next->text;
419 text = text->next;
420 } else
421 e = NULL;
422
423 while (e && *e && *t) {
424 int n;
425 int ec = *e;
426
427 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
428 if (ec == 'i')
429 fprintf(fp, "\\fI");
430 else if (ec == 'b')
431 fprintf(fp, "\\fB");
432 man_convert(t, n, &c, quote_props);
433 quote_props &= ~QUOTE_INITCTRL;
434 fprintf(fp, "%s", c);
435 sfree(c);
436 if (ec == 'i' || ec == 'b')
437 fprintf(fp, "\\fP");
438 t += n;
439 e += n;
440 }
441 man_convert(t, 0, &c, quote_props);
7136a6c7 442 fprintf(fp, "%s\n", c);
443 sfree(c);
444 }
445 fprintf(fp, ".fi\n");
446}