Initial work on PS and PDF output. Because these two backends share
[sgt/halibut] / bk_man.c
1 /*
2 * man page backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include "halibut.h"
9
10 static void man_text(FILE *, word *, int newline, int quote_props);
11 static void man_codepara(FILE *, word *);
12 static int man_convert(wchar_t *s, int maxlen,
13 char **result, int quote_props);
14
15 typedef struct {
16 wchar_t *th;
17 int headnumbers;
18 int mindepth;
19 char *filename;
20 } manconfig;
21
22 static manconfig man_configure(paragraph *source) {
23 manconfig ret;
24
25 /*
26 * Defaults.
27 */
28 ret.th = NULL;
29 ret.headnumbers = FALSE;
30 ret.mindepth = 0;
31 ret.filename = dupstr("output.1");
32
33 for (; source; source = source->next) {
34 if (source->type == para_Config) {
35 if (!ustricmp(source->keyword, L"man-identity")) {
36 wchar_t *wp, *ep;
37
38 wp = uadv(source->keyword);
39 ep = wp;
40 while (*ep)
41 ep = uadv(ep);
42 sfree(ret.th);
43 ret.th = mknewa(wchar_t, ep - wp + 1);
44 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
45 } else if (!ustricmp(source->keyword, L"man-headnumbers")) {
46 ret.headnumbers = utob(uadv(source->keyword));
47 } else if (!ustricmp(source->keyword, L"man-mindepth")) {
48 ret.mindepth = utoi(uadv(source->keyword));
49 } else if (!ustricmp(source->keyword, L"man-filename")) {
50 sfree(ret.filename);
51 ret.filename = utoa_dup(uadv(source->keyword));
52 }
53 }
54 }
55
56 return ret;
57 }
58
59 static void man_conf_cleanup(manconfig cf)
60 {
61 sfree(cf.th);
62 sfree(cf.filename);
63 }
64
65 paragraph *man_config_filename(char *filename)
66 {
67 paragraph *p;
68 wchar_t *ufilename, *up;
69 int len;
70
71 p = mknew(paragraph);
72 memset(p, 0, sizeof(*p));
73 p->type = para_Config;
74 p->next = NULL;
75 p->fpos.filename = "<command line>";
76 p->fpos.line = p->fpos.col = -1;
77
78 ufilename = ufroma_dup(filename);
79 len = ustrlen(ufilename) + 2 + lenof(L"man-filename");
80 p->keyword = mknewa(wchar_t, len);
81 up = p->keyword;
82 ustrcpy(up, L"man-filename");
83 up = uadv(up);
84 ustrcpy(up, ufilename);
85 up = uadv(up);
86 *up = L'\0';
87 assert(up - p->keyword < len);
88 sfree(ufilename);
89
90 return p;
91 }
92
93 #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
94 #define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
95
96 void man_backend(paragraph *sourceform, keywordlist *keywords,
97 indexdata *idx, void *unused) {
98 paragraph *p;
99 FILE *fp;
100 manconfig conf;
101
102 IGNORE(unused);
103 IGNORE(keywords);
104 IGNORE(idx);
105
106 conf = man_configure(sourceform);
107
108 /*
109 * Open the output file.
110 */
111 fp = fopen(conf.filename, "w");
112 if (!fp) {
113 error(err_cantopenw, conf.filename);
114 return;
115 }
116
117 /* Do the version ID */
118 for (p = sourceform; p; p = p->next)
119 if (p->type == para_VersionID) {
120 fprintf(fp, ".\\\" ");
121 man_text(fp, p->words, TRUE, 0);
122 }
123
124 /* .TH name-of-program manual-section */
125 fprintf(fp, ".TH");
126 if (conf.th && *conf.th) {
127 char *c;
128 wchar_t *wp;
129
130 for (wp = conf.th; *wp; wp = uadv(wp)) {
131 fputs(" \"", fp);
132 man_convert(wp, 0, &c, QUOTE_QUOTES);
133 fputs(c, fp);
134 sfree(c);
135 fputc('"', fp);
136 }
137 }
138 fputc('\n', fp);
139
140 fprintf(fp, ".UC\n");
141
142 for (p = sourceform; p; p = p->next) switch (p->type) {
143 /*
144 * Things we ignore because we've already processed them or
145 * aren't going to touch them in this pass.
146 */
147 case para_IM:
148 case para_BR:
149 case para_Biblio: /* only touch BiblioCited */
150 case para_VersionID:
151 case para_NoCite:
152 case para_Title:
153 break;
154
155 /*
156 * Headings.
157 */
158 case para_Chapter:
159 case para_Appendix:
160 case para_UnnumberedChapter:
161 case para_Heading:
162 case para_Subsect:
163
164 {
165 int depth;
166 if (p->type == para_Subsect)
167 depth = p->aux + 2;
168 else if (p->type == para_Heading)
169 depth = 1;
170 else
171 depth = 0;
172 if (depth >= conf.mindepth) {
173 fprintf(fp, ".SH \"");
174 if (conf.headnumbers && p->kwtext) {
175 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
176 fprintf(fp, " ");
177 }
178 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
179 fprintf(fp, "\"\n");
180 }
181 break;
182 }
183
184 /*
185 * Code paragraphs.
186 */
187 case para_Code:
188 fprintf(fp, ".PP\n");
189 man_codepara(fp, p->words);
190 break;
191
192 /*
193 * Normal paragraphs.
194 */
195 case para_Normal:
196 case para_Copyright:
197 fprintf(fp, ".PP\n");
198 man_text(fp, p->words, TRUE, 0);
199 break;
200
201 /*
202 * List paragraphs.
203 */
204 case para_Description:
205 case para_BiblioCited:
206 case para_Bullet:
207 case para_NumberedList:
208 if (p->type == para_Bullet) {
209 fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
210 } else if (p->type == para_NumberedList) {
211 fprintf(fp, ".IP \"");
212 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
213 fprintf(fp, "\"\n");
214 } else if (p->type == para_Description) {
215 /*
216 * Do nothing; the .xP for this paragraph is the .IP
217 * which has come before it in the DescribedThing.
218 */
219 } else if (p->type == para_BiblioCited) {
220 fprintf(fp, ".IP \"");
221 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
222 fprintf(fp, "\"\n");
223 }
224 man_text(fp, p->words, TRUE, 0);
225 break;
226
227 case para_DescribedThing:
228 fprintf(fp, ".IP \"");
229 man_text(fp, p->words, FALSE, QUOTE_QUOTES);
230 fprintf(fp, "\"\n");
231 break;
232
233 case para_Rule:
234 /*
235 * This isn't terribly good. Anyone who wants to do better
236 * should feel free!
237 */
238 fprintf(fp, ".PP\n----------------------------------------\n");
239 break;
240
241 case para_LcontPush:
242 case para_QuotePush:
243 fprintf(fp, ".RS\n");
244 break;
245 case para_LcontPop:
246 case para_QuotePop:
247 fprintf(fp, ".RE\n");
248 break;
249 }
250
251 /*
252 * Tidy up.
253 */
254 fclose(fp);
255 man_conf_cleanup(conf);
256 }
257
258 /*
259 * Convert a wide string into a string of chars. If `result' is
260 * non-NULL, mallocs the resulting string and stores a pointer to
261 * it in `*result'. If `result' is NULL, merely checks whether all
262 * characters in the string are feasible for the output character
263 * set.
264 *
265 * Return is nonzero if all characters are OK. If not all
266 * characters are OK but `result' is non-NULL, a result _will_
267 * still be generated!
268 *
269 * FIXME: Here is probably also a good place to do escaping sorts
270 * of things. I know I at least need to escape backslash, and full
271 * stops at the starts of words are probably trouble as well.
272 */
273 static int man_convert(wchar_t *s, int maxlen,
274 char **result, int quote_props) {
275 /*
276 * FIXME. Currently this is ISO8859-1 only.
277 */
278 int doing = (result != 0);
279 int ok = TRUE;
280 char *p = NULL;
281 int plen = 0, psize = 0;
282
283 if (maxlen <= 0)
284 maxlen = -1;
285
286 for (; *s && maxlen != 0; s++, maxlen--) {
287 wchar_t c = *s;
288 char outc;
289
290 if ((c >= 32 && c <= 126) ||
291 (c >= 160 && c <= 255)) {
292 /* Char is OK. */
293 outc = (char)c;
294 } else {
295 /* Char is not OK. */
296 ok = FALSE;
297 outc = 0xBF; /* approximate the good old DEC `uh?' */
298 }
299 if (doing) {
300 if (plen+3 >= psize) {
301 psize = plen + 256;
302 p = resize(p, psize);
303 }
304 if (plen == 0 && (outc == '.' || outc == '\'') &&
305 (quote_props & QUOTE_INITCTRL)) {
306 /*
307 * Control character (. or ') at the start of a
308 * line. Quote it by putting \& (troff zero-width
309 * space) before it.
310 */
311 p[plen++] = '\\';
312 p[plen++] = '&';
313 } else if (outc == '\\') {
314 /*
315 * Quote backslashes by doubling them, always.
316 */
317 p[plen++] = '\\';
318 } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) {
319 /*
320 * Double quote within double quotes. Quote it by
321 * doubling.
322 */
323 p[plen++] = '"';
324 }
325 p[plen++] = outc;
326 }
327 }
328 if (doing) {
329 p = resize(p, plen+1);
330 p[plen] = '\0';
331 *result = p;
332 }
333 return ok;
334 }
335
336 static void man_rdaddwc(rdstringc *rs, word *text, word *end,
337 int quote_props) {
338 char *c;
339
340 for (; text && text != end; text = text->next) switch (text->type) {
341 case word_HyperLink:
342 case word_HyperEnd:
343 case word_UpperXref:
344 case word_LowerXref:
345 case word_XrefEnd:
346 case word_IndexRef:
347 break;
348
349 case word_Normal:
350 case word_Emph:
351 case word_Code:
352 case word_WeakCode:
353 case word_WhiteSpace:
354 case word_EmphSpace:
355 case word_CodeSpace:
356 case word_WkCodeSpace:
357 case word_Quote:
358 case word_EmphQuote:
359 case word_CodeQuote:
360 case word_WkCodeQuote:
361 assert(text->type != word_CodeQuote &&
362 text->type != word_WkCodeQuote);
363 if (towordstyle(text->type) == word_Emph &&
364 (attraux(text->aux) == attr_First ||
365 attraux(text->aux) == attr_Only))
366 rdaddsc(rs, "\\fI");
367 else if ((towordstyle(text->type) == word_Code ||
368 towordstyle(text->type) == word_WeakCode) &&
369 (attraux(text->aux) == attr_First ||
370 attraux(text->aux) == attr_Only))
371 rdaddsc(rs, "\\fB");
372 if (removeattr(text->type) == word_Normal) {
373 if (rs->pos > 0)
374 quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
375 if (man_convert(text->text, 0, &c, quote_props))
376 rdaddsc(rs, c);
377 else
378 man_rdaddwc(rs, text->alt, NULL, quote_props);
379 sfree(c);
380 } else if (removeattr(text->type) == word_WhiteSpace) {
381 rdaddc(rs, ' ');
382 } else if (removeattr(text->type) == word_Quote) {
383 rdaddc(rs, '"');
384 if (quote_props & QUOTE_QUOTES)
385 rdaddc(rs, '"');
386 }
387 if (towordstyle(text->type) == word_Emph &&
388 (attraux(text->aux) == attr_Last ||
389 attraux(text->aux) == attr_Only))
390 rdaddsc(rs, "\\fP");
391 else if ((towordstyle(text->type) == word_Code ||
392 towordstyle(text->type) == word_WeakCode) &&
393 (attraux(text->aux) == attr_Last ||
394 attraux(text->aux) == attr_Only))
395 rdaddsc(rs, "\\fP");
396 break;
397 }
398 }
399
400 static void man_text(FILE *fp, word *text, int newline, int quote_props) {
401 rdstringc t = { 0, 0, NULL };
402
403 man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL);
404 fprintf(fp, "%s", t.text);
405 sfree(t.text);
406 if (newline)
407 fputc('\n', fp);
408 }
409
410 static void man_codepara(FILE *fp, word *text) {
411 fprintf(fp, ".nf\n");
412 for (; text; text = text->next) if (text->type == word_WeakCode) {
413 char *c;
414 wchar_t *t, *e;
415 int quote_props = QUOTE_INITCTRL;
416
417 t = text->text;
418 if (text->next && text->next->type == word_Emph) {
419 e = text->next->text;
420 text = text->next;
421 } else
422 e = NULL;
423
424 while (e && *e && *t) {
425 int n;
426 int ec = *e;
427
428 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
429 if (ec == 'i')
430 fprintf(fp, "\\fI");
431 else if (ec == 'b')
432 fprintf(fp, "\\fB");
433 man_convert(t, n, &c, quote_props);
434 quote_props &= ~QUOTE_INITCTRL;
435 fprintf(fp, "%s", c);
436 sfree(c);
437 if (ec == 'i' || ec == 'b')
438 fprintf(fp, "\\fP");
439 t += n;
440 e += n;
441 }
442 man_convert(t, 0, &c, quote_props);
443 fprintf(fp, "%s\n", c);
444 sfree(c);
445 }
446 fprintf(fp, ".fi\n");
447 }