Support for \cfg{input-charset}. Input files can now be in ASCII,
[sgt/halibut] / bk_man.c
CommitLineData
7136a6c7 1/*
2 * man page backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10static void man_text(FILE *, word *, int newline, int quote_props);
11static void man_codepara(FILE *, word *);
4b3c5afb 12static int man_convert(wchar_t *s, int maxlen,
13 char **result, int quote_props);
14
15typedef struct {
16 wchar_t *th;
17 int headnumbers;
18 int mindepth;
50d6b4bd 19 char *filename;
4b3c5afb 20} manconfig;
21
22static manconfig man_configure(paragraph *source) {
23 manconfig ret;
24
25 /*
26 * Defaults.
27 */
28 ret.th = NULL;
29 ret.headnumbers = FALSE;
30 ret.mindepth = 0;
50d6b4bd 31 ret.filename = dupstr("output.1");
4b3c5afb 32
33 for (; source; source = source->next) {
34 if (source->type == para_Config) {
35 if (!ustricmp(source->keyword, L"man-identity")) {
36 wchar_t *wp, *ep;
37
38 wp = uadv(source->keyword);
39 ep = wp;
40 while (*ep)
41 ep = uadv(ep);
50d6b4bd 42 sfree(ret.th);
4b3c5afb 43 ret.th = mknewa(wchar_t, ep - wp + 1);
44 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
45 } else if (!ustricmp(source->keyword, L"man-headnumbers")) {
46 ret.headnumbers = utob(uadv(source->keyword));
47 } else if (!ustricmp(source->keyword, L"man-mindepth")) {
48 ret.mindepth = utoi(uadv(source->keyword));
50d6b4bd 49 } else if (!ustricmp(source->keyword, L"man-filename")) {
50 sfree(ret.filename);
51 ret.filename = utoa_dup(uadv(source->keyword));
4b3c5afb 52 }
53 }
54 }
55
56 return ret;
57}
58
59static void man_conf_cleanup(manconfig cf)
60{
61 sfree(cf.th);
50d6b4bd 62 sfree(cf.filename);
4b3c5afb 63}
7136a6c7 64
ba9c1487 65paragraph *man_config_filename(char *filename)
66{
67 paragraph *p;
68 wchar_t *ufilename, *up;
69 int len;
70
71 p = mknew(paragraph);
72 memset(p, 0, sizeof(*p));
73 p->type = para_Config;
74 p->next = NULL;
75 p->fpos.filename = "<command line>";
76 p->fpos.line = p->fpos.col = -1;
77
78 ufilename = ufroma_dup(filename);
79 len = ustrlen(ufilename) + 2 + lenof(L"man-filename");
80 p->keyword = mknewa(wchar_t, len);
81 up = p->keyword;
82 ustrcpy(up, L"man-filename");
83 up = uadv(up);
84 ustrcpy(up, ufilename);
85 up = uadv(up);
86 *up = L'\0';
87 assert(up - p->keyword < len);
88 sfree(ufilename);
89
90 return p;
91}
92
7136a6c7 93#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
94#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
95
96void man_backend(paragraph *sourceform, keywordlist *keywords,
43341922 97 indexdata *idx, void *unused) {
7136a6c7 98 paragraph *p;
99 FILE *fp;
4b3c5afb 100 manconfig conf;
7136a6c7 101
43341922 102 IGNORE(unused);
103 IGNORE(keywords);
104 IGNORE(idx);
7136a6c7 105
4b3c5afb 106 conf = man_configure(sourceform);
107
7136a6c7 108 /*
50d6b4bd 109 * Open the output file.
7136a6c7 110 */
50d6b4bd 111 fp = fopen(conf.filename, "w");
7136a6c7 112 if (!fp) {
50d6b4bd 113 error(err_cantopenw, conf.filename);
7136a6c7 114 return;
115 }
116
117 /* Do the version ID */
118 for (p = sourceform; p; p = p->next)
119 if (p->type == para_VersionID) {
120 fprintf(fp, ".\\\" ");
121 man_text(fp, p->words, TRUE, 0);
122 }
123
4b3c5afb 124 /* .TH name-of-program manual-section */
22905f72 125 fprintf(fp, ".TH");
126 if (conf.th && *conf.th) {
4b3c5afb 127 char *c;
22905f72 128 wchar_t *wp;
129
130 for (wp = conf.th; *wp; wp = uadv(wp)) {
131 fputs(" \"", fp);
132 man_convert(wp, 0, &c, QUOTE_QUOTES);
133 fputs(c, fp);
134 sfree(c);
135 fputc('"', fp);
4b3c5afb 136 }
137 }
22905f72 138 fputc('\n', fp);
7136a6c7 139
140 fprintf(fp, ".UC\n");
141
7136a6c7 142 for (p = sourceform; p; p = p->next) switch (p->type) {
143 /*
144 * Things we ignore because we've already processed them or
145 * aren't going to touch them in this pass.
146 */
147 case para_IM:
148 case para_BR:
149 case para_Biblio: /* only touch BiblioCited */
150 case para_VersionID:
7136a6c7 151 case para_NoCite:
152 case para_Title:
153 break;
154
155 /*
156 * Headings.
157 */
158 case para_Chapter:
159 case para_Appendix:
160 case para_UnnumberedChapter:
161 case para_Heading:
162 case para_Subsect:
8902e0ed 163
4b3c5afb 164 {
165 int depth;
166 if (p->type == para_Subsect)
167 depth = p->aux + 2;
168 else if (p->type == para_Heading)
169 depth = 1;
170 else
171 depth = 0;
172 if (depth >= conf.mindepth) {
173 fprintf(fp, ".SH \"");
174 if (conf.headnumbers && p->kwtext) {
175 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
176 fprintf(fp, " ");
177 }
178 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
179 fprintf(fp, "\"\n");
180 }
181 break;
182 }
7136a6c7 183
184 /*
185 * Code paragraphs.
186 */
187 case para_Code:
188 fprintf(fp, ".PP\n");
189 man_codepara(fp, p->words);
190 break;
191
192 /*
193 * Normal paragraphs.
194 */
195 case para_Normal:
9057a0a8 196 case para_Copyright:
7136a6c7 197 fprintf(fp, ".PP\n");
198 man_text(fp, p->words, TRUE, 0);
199 break;
200
201 /*
202 * List paragraphs.
203 */
204 case para_Description:
205 case para_BiblioCited:
206 case para_Bullet:
207 case para_NumberedList:
208 if (p->type == para_Bullet) {
209 fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
210 } else if (p->type == para_NumberedList) {
211 fprintf(fp, ".IP \"");
212 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
213 fprintf(fp, "\"\n");
214 } else if (p->type == para_Description) {
215 /*
216 * Do nothing; the .xP for this paragraph is the .IP
217 * which has come before it in the DescribedThing.
218 */
219 } else if (p->type == para_BiblioCited) {
220 fprintf(fp, ".IP \"");
221 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
222 fprintf(fp, "\"\n");
223 }
224 man_text(fp, p->words, TRUE, 0);
225 break;
226
227 case para_DescribedThing:
228 fprintf(fp, ".IP \"");
229 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
230 fprintf(fp, "\"\n");
231 break;
232
233 case para_Rule:
234 /*
4b3c5afb 235 * This isn't terribly good. Anyone who wants to do better
236 * should feel free!
7136a6c7 237 */
4b3c5afb 238 fprintf(fp, ".PP\n----------------------------------------\n");
7136a6c7 239 break;
240
241 case para_LcontPush:
2614b01d 242 case para_QuotePush:
7136a6c7 243 fprintf(fp, ".RS\n");
244 break;
245 case para_LcontPop:
2614b01d 246 case para_QuotePop:
7136a6c7 247 fprintf(fp, ".RE\n");
248 break;
249 }
250
251 /*
252 * Tidy up.
253 */
254 fclose(fp);
4b3c5afb 255 man_conf_cleanup(conf);
7136a6c7 256}
257
258/*
259 * Convert a wide string into a string of chars. If `result' is
260 * non-NULL, mallocs the resulting string and stores a pointer to
261 * it in `*result'. If `result' is NULL, merely checks whether all
262 * characters in the string are feasible for the output character
263 * set.
264 *
265 * Return is nonzero if all characters are OK. If not all
266 * characters are OK but `result' is non-NULL, a result _will_
267 * still be generated!
268 *
269 * FIXME: Here is probably also a good place to do escaping sorts
270 * of things. I know I at least need to escape backslash, and full
271 * stops at the starts of words are probably trouble as well.
272 */
4b3c5afb 273static int man_convert(wchar_t *s, int maxlen,
274 char **result, int quote_props) {
7136a6c7 275 /*
276 * FIXME. Currently this is ISO8859-1 only.
277 */
278 int doing = (result != 0);
279 int ok = TRUE;
280 char *p = NULL;
281 int plen = 0, psize = 0;
282
4b3c5afb 283 if (maxlen <= 0)
284 maxlen = -1;
285
286 for (; *s && maxlen != 0; s++, maxlen--) {
7136a6c7 287 wchar_t c = *s;
288 char outc;
289
290 if ((c >= 32 && c <= 126) ||
291 (c >= 160 && c <= 255)) {
292 /* Char is OK. */
293 outc = (char)c;
294 } else {
295 /* Char is not OK. */
296 ok = FALSE;
297 outc = 0xBF; /* approximate the good old DEC `uh?' */
298 }
299 if (doing) {
300 if (plen+3 >= psize) {
301 psize = plen + 256;
302 p = resize(p, psize);
303 }
304 if (plen == 0 && (outc == '.' || outc == '\'') &&
305 (quote_props & QUOTE_INITCTRL)) {
306 /*
307 * Control character (. or ') at the start of a
308 * line. Quote it by putting \& (troff zero-width
309 * space) before it.
310 */
311 p[plen++] = '\\';
312 p[plen++] = '&';
313 } else if (outc == '\\') {
314 /*
315 * Quote backslashes by doubling them, always.
316 */
317 p[plen++] = '\\';
318 } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) {
319 /*
320 * Double quote within double quotes. Quote it by
321 * doubling.
322 */
323 p[plen++] = '"';
324 }
325 p[plen++] = outc;
326 }
327 }
328 if (doing) {
329 p = resize(p, plen+1);
330 p[plen] = '\0';
331 *result = p;
332 }
333 return ok;
334}
335
336static void man_rdaddwc(rdstringc *rs, word *text, word *end,
337 int quote_props) {
338 char *c;
339
340 for (; text && text != end; text = text->next) switch (text->type) {
341 case word_HyperLink:
342 case word_HyperEnd:
343 case word_UpperXref:
344 case word_LowerXref:
345 case word_XrefEnd:
346 case word_IndexRef:
347 break;
348
349 case word_Normal:
350 case word_Emph:
351 case word_Code:
352 case word_WeakCode:
353 case word_WhiteSpace:
354 case word_EmphSpace:
355 case word_CodeSpace:
356 case word_WkCodeSpace:
357 case word_Quote:
358 case word_EmphQuote:
359 case word_CodeQuote:
360 case word_WkCodeQuote:
361 assert(text->type != word_CodeQuote &&
362 text->type != word_WkCodeQuote);
363 if (towordstyle(text->type) == word_Emph &&
364 (attraux(text->aux) == attr_First ||
365 attraux(text->aux) == attr_Only))
366 rdaddsc(rs, "\\fI");
4b3c5afb 367 else if ((towordstyle(text->type) == word_Code ||
368 towordstyle(text->type) == word_WeakCode) &&
7136a6c7 369 (attraux(text->aux) == attr_First ||
370 attraux(text->aux) == attr_Only))
371 rdaddsc(rs, "\\fB");
372 if (removeattr(text->type) == word_Normal) {
373 if (rs->pos > 0)
374 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
12efc259 375 if (man_convert(text->text, 0, &c, quote_props) || !text->alt)
7136a6c7 376 rdaddsc(rs, c);
377 else
378 man_rdaddwc(rs, text->alt, NULL, quote_props);
379 sfree(c);
380 } else if (removeattr(text->type) == word_WhiteSpace) {
381 rdaddc(rs, ' ');
382 } else if (removeattr(text->type) == word_Quote) {
4b3c5afb 383 rdaddc(rs, '"');
384 if (quote_props & QUOTE_QUOTES)
385 rdaddc(rs, '"');
7136a6c7 386 }
387 if (towordstyle(text->type) == word_Emph &&
388 (attraux(text->aux) == attr_Last ||
389 attraux(text->aux) == attr_Only))
390 rdaddsc(rs, "\\fP");
4b3c5afb 391 else if ((towordstyle(text->type) == word_Code ||
392 towordstyle(text->type) == word_WeakCode) &&
7136a6c7 393 (attraux(text->aux) == attr_Last ||
394 attraux(text->aux) == attr_Only))
395 rdaddsc(rs, "\\fP");
396 break;
397 }
398}
399
400static void man_text(FILE *fp, word *text, int newline, int quote_props) {
401 rdstringc t = { 0, 0, NULL };
402
403 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL);
404 fprintf(fp, "%s", t.text);
405 sfree(t.text);
406 if (newline)
407 fputc('\n', fp);
408}
409
410static void man_codepara(FILE *fp, word *text) {
411 fprintf(fp, ".nf\n");
412 for (; text; text = text->next) if (text->type == word_WeakCode) {
413 char *c;
4b3c5afb 414 wchar_t *t, *e;
415 int quote_props = QUOTE_INITCTRL;
416
417 t = text->text;
418 if (text->next && text->next->type == word_Emph) {
419 e = text->next->text;
420 text = text->next;
421 } else
422 e = NULL;
423
424 while (e && *e && *t) {
425 int n;
426 int ec = *e;
427
428 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
429 if (ec == 'i')
430 fprintf(fp, "\\fI");
431 else if (ec == 'b')
432 fprintf(fp, "\\fB");
433 man_convert(t, n, &c, quote_props);
434 quote_props &= ~QUOTE_INITCTRL;
435 fprintf(fp, "%s", c);
436 sfree(c);
437 if (ec == 'i' || ec == 'b')
438 fprintf(fp, "\\fP");
439 t += n;
440 e += n;
441 }
442 man_convert(t, 0, &c, quote_props);
7136a6c7 443 fprintf(fp, "%s\n", c);
444 sfree(c);
445 }
446 fprintf(fp, ".fi\n");
447}