Cleanups to complete the man page backend. Also, an additional new
[sgt/halibut] / bk_man.c
1 /*
2 * man page backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include "halibut.h"
9
10 static void man_text(FILE *, word *, int newline, int quote_props);
11 static void man_codepara(FILE *, word *);
12 static int man_convert(wchar_t *s, int maxlen,
13 char **result, int quote_props);
14
15 typedef struct {
16 wchar_t *th;
17 int headnumbers;
18 int mindepth;
19 } manconfig;
20
21 static manconfig man_configure(paragraph *source) {
22 manconfig ret;
23
24 /*
25 * Defaults.
26 */
27 ret.th = NULL;
28 ret.headnumbers = FALSE;
29 ret.mindepth = 0;
30
31 for (; source; source = source->next) {
32 if (source->type == para_Config) {
33 if (!ustricmp(source->keyword, L"man-identity")) {
34 wchar_t *wp, *ep;
35
36 wp = uadv(source->keyword);
37 ep = wp;
38 while (*ep)
39 ep = uadv(ep);
40 ret.th = mknewa(wchar_t, ep - wp + 1);
41 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
42 } else if (!ustricmp(source->keyword, L"man-headnumbers")) {
43 ret.headnumbers = utob(uadv(source->keyword));
44 } else if (!ustricmp(source->keyword, L"man-mindepth")) {
45 ret.mindepth = utoi(uadv(source->keyword));
46 }
47 }
48 }
49
50 return ret;
51 }
52
53 static void man_conf_cleanup(manconfig cf)
54 {
55 sfree(cf.th);
56 }
57
58 #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
59 #define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
60
61 void man_backend(paragraph *sourceform, keywordlist *keywords,
62 indexdata *idx) {
63 paragraph *p;
64 FILE *fp;
65 char const *sep;
66 manconfig conf;
67
68 IGNORE(keywords); /* we don't happen to need this */
69 IGNORE(idx); /* or this */
70
71 conf = man_configure(sourceform);
72
73 /*
74 * Determine the output file name, and open the output file
75 *
76 * FIXME: want configurable output file names here. For the
77 * moment, we'll just call it `output.1'.
78 */
79 fp = fopen("output.1", "w");
80 if (!fp) {
81 error(err_cantopenw, "output.1");
82 return;
83 }
84
85 /* Do the version ID */
86 for (p = sourceform; p; p = p->next)
87 if (p->type == para_VersionID) {
88 fprintf(fp, ".\\\" ");
89 man_text(fp, p->words, TRUE, 0);
90 }
91
92 /* .TH name-of-program manual-section */
93 {
94 char *c;
95 if (conf.th && *conf.th) {
96 wchar_t *wp;
97 fprintf(fp, ".TH");
98
99 for (wp = conf.th; *wp; wp = uadv(wp)) {
100 fputs(" \"", fp);
101 man_convert(wp, 0, &c, QUOTE_QUOTES);
102 fputs(c, fp);
103 sfree(c);
104 fputc('"', fp);
105 }
106 fputc('\n', fp);
107 }
108 }
109
110 fprintf(fp, ".UC\n");
111
112 /* Do the preamble and copyright */
113 sep = "";
114 for (p = sourceform; p; p = p->next)
115 if (p->type == para_Preamble) {
116 fprintf(fp, "%s", sep);
117 man_text(fp, p->words, TRUE, 0);
118 sep = "\n";
119 }
120 for (p = sourceform; p; p = p->next)
121 if (p->type == para_Copyright) {
122 fprintf(fp, "%s", sep);
123 man_text(fp, p->words, TRUE, 0);
124 sep = "\n";
125 }
126
127 for (p = sourceform; p; p = p->next) switch (p->type) {
128 /*
129 * Things we ignore because we've already processed them or
130 * aren't going to touch them in this pass.
131 */
132 case para_IM:
133 case para_BR:
134 case para_Biblio: /* only touch BiblioCited */
135 case para_VersionID:
136 case para_Copyright:
137 case para_Preamble:
138 case para_NoCite:
139 case para_Title:
140 break;
141
142 /*
143 * Headings.
144 */
145 case para_Chapter:
146 case para_Appendix:
147 case para_UnnumberedChapter:
148 case para_Heading:
149 case para_Subsect:
150 {
151 int depth;
152 if (p->type == para_Subsect)
153 depth = p->aux + 2;
154 else if (p->type == para_Heading)
155 depth = 1;
156 else
157 depth = 0;
158 if (depth >= conf.mindepth) {
159 fprintf(fp, ".SH \"");
160 if (conf.headnumbers && p->kwtext) {
161 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
162 fprintf(fp, " ");
163 }
164 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
165 fprintf(fp, "\"\n");
166 }
167 break;
168 }
169
170 /*
171 * Code paragraphs.
172 */
173 case para_Code:
174 fprintf(fp, ".PP\n");
175 man_codepara(fp, p->words);
176 break;
177
178 /*
179 * Normal paragraphs.
180 */
181 case para_Normal:
182 fprintf(fp, ".PP\n");
183 man_text(fp, p->words, TRUE, 0);
184 break;
185
186 /*
187 * List paragraphs.
188 */
189 case para_Description:
190 case para_BiblioCited:
191 case para_Bullet:
192 case para_NumberedList:
193 if (p->type == para_Bullet) {
194 fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
195 } else if (p->type == para_NumberedList) {
196 fprintf(fp, ".IP \"");
197 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
198 fprintf(fp, "\"\n");
199 } else if (p->type == para_Description) {
200 /*
201 * Do nothing; the .xP for this paragraph is the .IP
202 * which has come before it in the DescribedThing.
203 */
204 } else if (p->type == para_BiblioCited) {
205 fprintf(fp, ".IP \"");
206 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
207 fprintf(fp, "\"\n");
208 }
209 man_text(fp, p->words, TRUE, 0);
210 break;
211
212 case para_DescribedThing:
213 fprintf(fp, ".IP \"");
214 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
215 fprintf(fp, "\"\n");
216 break;
217
218 case para_Rule:
219 /*
220 * This isn't terribly good. Anyone who wants to do better
221 * should feel free!
222 */
223 fprintf(fp, ".PP\n----------------------------------------\n");
224 break;
225
226 case para_LcontPush:
227 fprintf(fp, ".RS\n");
228 break;
229 case para_LcontPop:
230 fprintf(fp, ".RE\n");
231 break;
232 }
233
234 /*
235 * Tidy up.
236 */
237 fclose(fp);
238 man_conf_cleanup(conf);
239 }
240
241 /*
242 * Convert a wide string into a string of chars. If `result' is
243 * non-NULL, mallocs the resulting string and stores a pointer to
244 * it in `*result'. If `result' is NULL, merely checks whether all
245 * characters in the string are feasible for the output character
246 * set.
247 *
248 * Return is nonzero if all characters are OK. If not all
249 * characters are OK but `result' is non-NULL, a result _will_
250 * still be generated!
251 *
252 * FIXME: Here is probably also a good place to do escaping sorts
253 * of things. I know I at least need to escape backslash, and full
254 * stops at the starts of words are probably trouble as well.
255 */
256 static int man_convert(wchar_t *s, int maxlen,
257 char **result, int quote_props) {
258 /*
259 * FIXME. Currently this is ISO8859-1 only.
260 */
261 int doing = (result != 0);
262 int ok = TRUE;
263 char *p = NULL;
264 int plen = 0, psize = 0;
265
266 if (maxlen <= 0)
267 maxlen = -1;
268
269 for (; *s && maxlen != 0; s++, maxlen--) {
270 wchar_t c = *s;
271 char outc;
272
273 if ((c >= 32 && c <= 126) ||
274 (c >= 160 && c <= 255)) {
275 /* Char is OK. */
276 outc = (char)c;
277 } else {
278 /* Char is not OK. */
279 ok = FALSE;
280 outc = 0xBF; /* approximate the good old DEC `uh?' */
281 }
282 if (doing) {
283 if (plen+3 >= psize) {
284 psize = plen + 256;
285 p = resize(p, psize);
286 }
287 if (plen == 0 && (outc == '.' || outc == '\'') &&
288 (quote_props & QUOTE_INITCTRL)) {
289 /*
290 * Control character (. or ') at the start of a
291 * line. Quote it by putting \& (troff zero-width
292 * space) before it.
293 */
294 p[plen++] = '\\';
295 p[plen++] = '&';
296 } else if (outc == '\\') {
297 /*
298 * Quote backslashes by doubling them, always.
299 */
300 p[plen++] = '\\';
301 } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) {
302 /*
303 * Double quote within double quotes. Quote it by
304 * doubling.
305 */
306 p[plen++] = '"';
307 }
308 p[plen++] = outc;
309 }
310 }
311 if (doing) {
312 p = resize(p, plen+1);
313 p[plen] = '\0';
314 *result = p;
315 }
316 return ok;
317 }
318
319 static void man_rdaddwc(rdstringc *rs, word *text, word *end,
320 int quote_props) {
321 char *c;
322
323 for (; text && text != end; text = text->next) switch (text->type) {
324 case word_HyperLink:
325 case word_HyperEnd:
326 case word_UpperXref:
327 case word_LowerXref:
328 case word_XrefEnd:
329 case word_IndexRef:
330 break;
331
332 case word_Normal:
333 case word_Emph:
334 case word_Code:
335 case word_WeakCode:
336 case word_WhiteSpace:
337 case word_EmphSpace:
338 case word_CodeSpace:
339 case word_WkCodeSpace:
340 case word_Quote:
341 case word_EmphQuote:
342 case word_CodeQuote:
343 case word_WkCodeQuote:
344 assert(text->type != word_CodeQuote &&
345 text->type != word_WkCodeQuote);
346 if (towordstyle(text->type) == word_Emph &&
347 (attraux(text->aux) == attr_First ||
348 attraux(text->aux) == attr_Only))
349 rdaddsc(rs, "\\fI");
350 else if ((towordstyle(text->type) == word_Code ||
351 towordstyle(text->type) == word_WeakCode) &&
352 (attraux(text->aux) == attr_First ||
353 attraux(text->aux) == attr_Only))
354 rdaddsc(rs, "\\fB");
355 if (removeattr(text->type) == word_Normal) {
356 if (rs->pos > 0)
357 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
358 if (man_convert(text->text, 0, &c, quote_props))
359 rdaddsc(rs, c);
360 else
361 man_rdaddwc(rs, text->alt, NULL, quote_props);
362 sfree(c);
363 } else if (removeattr(text->type) == word_WhiteSpace) {
364 rdaddc(rs, ' ');
365 } else if (removeattr(text->type) == word_Quote) {
366 rdaddc(rs, '"');
367 if (quote_props & QUOTE_QUOTES)
368 rdaddc(rs, '"');
369 }
370 if (towordstyle(text->type) == word_Emph &&
371 (attraux(text->aux) == attr_Last ||
372 attraux(text->aux) == attr_Only))
373 rdaddsc(rs, "\\fP");
374 else if ((towordstyle(text->type) == word_Code ||
375 towordstyle(text->type) == word_WeakCode) &&
376 (attraux(text->aux) == attr_Last ||
377 attraux(text->aux) == attr_Only))
378 rdaddsc(rs, "\\fP");
379 break;
380 }
381 }
382
383 static void man_text(FILE *fp, word *text, int newline, int quote_props) {
384 rdstringc t = { 0, 0, NULL };
385
386 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL);
387 fprintf(fp, "%s", t.text);
388 sfree(t.text);
389 if (newline)
390 fputc('\n', fp);
391 }
392
393 static void man_codepara(FILE *fp, word *text) {
394 fprintf(fp, ".nf\n");
395 for (; text; text = text->next) if (text->type == word_WeakCode) {
396 char *c;
397 wchar_t *t, *e;
398 int quote_props = QUOTE_INITCTRL;
399
400 t = text->text;
401 if (text->next && text->next->type == word_Emph) {
402 e = text->next->text;
403 text = text->next;
404 } else
405 e = NULL;
406
407 while (e && *e && *t) {
408 int n;
409 int ec = *e;
410
411 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
412 if (ec == 'i')
413 fprintf(fp, "\\fI");
414 else if (ec == 'b')
415 fprintf(fp, "\\fB");
416 man_convert(t, n, &c, quote_props);
417 quote_props &= ~QUOTE_INITCTRL;
418 fprintf(fp, "%s", c);
419 sfree(c);
420 if (ec == 'i' || ec == 'b')
421 fprintf(fp, "\\fP");
422 t += n;
423 e += n;
424 }
425 man_convert(t, 0, &c, quote_props);
426 fprintf(fp, "%s\n", c);
427 sfree(c);
428 }
429 fprintf(fp, ".fi\n");
430 }