2 * man page backend for Halibut
16 wchar_t *bullet
, *rule
, *lquote
, *rquote
;
19 static void man_text(FILE *, word
*,
20 int newline
, int quote_props
, manconfig
*conf
);
21 static void man_codepara(FILE *, word
*, int charset
);
22 static int man_convert(wchar_t const *s
, int maxlen
,
23 char **result
, int quote_props
,
24 int charset
, charset_state
*state
);
27 * My TROFF reference is "NROFF/TROFF User's Manual", Joseph
28 * F. Ossana, October 11 1976.
37 * \(sl slash (matching bakslash)
38 * \(br box vertical rule
39 * \(br Bell System logo
41 * all characters for constructing large brackets
47 } const man_charmap
[] = {
48 {0x00A2, "\\(ct"}, {0x00A7, "\\(sc"}, {0x00A9, "\\(co"}, {0x00AC, "\\(no"},
49 {0x00AE, "\\(rg"}, {0x00B0, "\\(de"}, {0x00B1, "\\(+-"}, {0x00B4, "\\(aa"},
50 {0x00BC, "\\(14"}, {0x00BD, "\\(12"}, {0x00BE, "\\(34"}, {0x00D7, "\\(mu"},
53 {0x0391, "\\(*A"}, {0x0392, "\\(*B"}, {0x0393, "\\(*G"}, {0x0394, "\\(*D"},
54 {0x0395, "\\(*E"}, {0x0396, "\\(*Z"}, {0x0397, "\\(*Y"}, {0x0398, "\\(*H"},
55 {0x0399, "\\(*I"}, {0x039A, "\\(*K"}, {0x039B, "\\(*L"}, {0x039C, "\\(*M"},
56 {0x039D, "\\(*N"}, {0x039E, "\\(*C"}, {0x039F, "\\(*O"}, {0x03A0, "\\(*P"},
57 {0x03A1, "\\(*R"}, {0x03A3, "\\(*S"}, {0x03A4, "\\(*T"}, {0x03A5, "\\(*U"},
58 {0x03A6, "\\(*F"}, {0x03A7, "\\(*X"}, {0x03A8, "\\(*Q"}, {0x03A9, "\\(*W"},
59 {0x03B1, "\\(*a"}, {0x03B2, "\\(*b"}, {0x03B3, "\\(*g"}, {0x03B4, "\\(*d"},
60 {0x03B5, "\\(*e"}, {0x03B6, "\\(*z"}, {0x03B7, "\\(*y"}, {0x03B8, "\\(*h"},
61 {0x03B9, "\\(*i"}, {0x03BA, "\\(*k"}, {0x03BB, "\\(*l"}, {0x03BC, "\\(*m"},
62 {0x03BD, "\\(*n"}, {0x03BE, "\\(*c"}, {0x03BF, "\\(*o"}, {0x03C0, "\\(*p"},
63 {0x03C1, "\\(*r"}, {0x03C2, "\\(ts"}, {0x03C3, "\\(*s"}, {0x03C4, "\\(*t"},
64 {0x03C5, "\\(*u"}, {0x03C6, "\\(*f"}, {0x03C7, "\\(*x"}, {0x03C8, "\\(*q"},
67 {0x2014, "\\(em"}, {0x2018, "`"}, {0x2019, "'"}, {0x2020, "\\(dg"},
68 {0x2021, "\\(dd"}, {0x2022, "\\(bu"}, {0x2032, "\\(fm"},
70 {0x2190, "\\(<-"}, {0x2191, "\\(ua"}, {0x2192, "\\(->"}, {0x2193, "\\(da"},
72 {0x2202, "\\(pd"}, {0x2205, "\\(es"}, {0x2207, "\\(gr"}, {0x2208, "\\(mo"},
73 {0x2212, "\\-"}, {0x2217, "\\(**"}, {0x221A, "\\(sr"}, {0x221D, "\\(pt"},
74 {0x221E, "\\(if"}, {0x2229, "\\(ca"}, {0x222A, "\\(cu"}, {0x222B, "\\(is"},
75 {0x223C, "\\(ap"}, {0x2245, "\\(~="}, {0x2260, "\\(!="}, {0x2261, "\\(=="},
76 {0x2264, "\\(<="}, {0x2265, "\\(>="}, {0x2282, "\\(sb"}, {0x2283, "\\(sp"},
77 {0x2286, "\\(ib"}, {0x2287, "\\(ip"},
79 {0x25A1, "\\(sq"}, {0x25CB, "\\(ci"},
81 {0x261C, "\\(lh"}, {0x261E, "\\(rh"},
84 static char const *troffchar(int unichar
) {
88 j
= lenof(man_charmap
);
91 if (man_charmap
[k
].uni
== unichar
)
92 return man_charmap
[k
].troff
;
93 else if (man_charmap
[k
].uni
> unichar
)
102 * Return TRUE if we can represent the whole of the given string either
103 * in the output charset or as named characters; FALSE otherwise.
105 static int troff_ok(int charset
, wchar_t *string
) {
110 if (!cvt_ok(charset
, test
) && !troffchar(*string
))
117 static manconfig
man_configure(paragraph
*source
) {
125 ret
.headnumbers
= FALSE
;
127 ret
.filename
= dupstr("output.1");
128 ret
.charset
= CS_ASCII
;
129 ret
.bullet
= L
"\x2022\0o\0\0";
130 ret
.rule
= L
"\x2500\0-\0\0";
131 ret
.lquote
= L
"\x2018\0\x2019\0\"\0\"\0\0";
132 ret
.rquote
= uadv(ret
.lquote
);
135 * Two-pass configuration so that we can pick up global config
136 * (e.g. `quotes') before having it overridden by specific
137 * config (`man-quotes'), irrespective of the order in which
140 for (p
= source
; p
; p
= p
->next
) {
141 if (p
->type
== para_Config
) {
142 if (!ustricmp(p
->keyword
, L
"quotes")) {
143 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
144 ret
.lquote
= uadv(p
->keyword
);
145 ret
.rquote
= uadv(ret
.lquote
);
151 for (p
= source
; p
; p
= p
->next
) {
152 if (p
->type
== para_Config
) {
153 if (!ustricmp(p
->keyword
, L
"man-identity")) {
156 wp
= uadv(p
->keyword
);
161 ret
.th
= snewn(ep
- wp
+ 1, wchar_t);
162 memcpy(ret
.th
, wp
, (ep
- wp
+ 1) * sizeof(wchar_t));
163 } else if (!ustricmp(p
->keyword
, L
"man-charset")) {
164 ret
.charset
= charset_from_ustr(&p
->fpos
, uadv(p
->keyword
));
165 } else if (!ustricmp(p
->keyword
, L
"man-headnumbers")) {
166 ret
.headnumbers
= utob(uadv(p
->keyword
));
167 } else if (!ustricmp(p
->keyword
, L
"man-mindepth")) {
168 ret
.mindepth
= utoi(uadv(p
->keyword
));
169 } else if (!ustricmp(p
->keyword
, L
"man-filename")) {
171 ret
.filename
= dupstr(adv(p
->origkeyword
));
172 } else if (!ustricmp(p
->keyword
, L
"man-bullet")) {
173 ret
.bullet
= uadv(p
->keyword
);
174 } else if (!ustricmp(p
->keyword
, L
"man-rule")) {
175 ret
.rule
= uadv(p
->keyword
);
176 } else if (!ustricmp(p
->keyword
, L
"man-quotes")) {
177 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
178 ret
.lquote
= uadv(p
->keyword
);
179 ret
.rquote
= uadv(ret
.lquote
);
186 * Now process fallbacks on quote characters, bullets, and the
189 while (*uadv(ret
.rquote
) && *uadv(uadv(ret
.rquote
)) &&
190 (!troff_ok(ret
.charset
, ret
.lquote
) ||
191 !troff_ok(ret
.charset
, ret
.rquote
))) {
192 ret
.lquote
= uadv(ret
.rquote
);
193 ret
.rquote
= uadv(ret
.lquote
);
196 while (*ret
.bullet
&& *uadv(ret
.bullet
) &&
197 !troff_ok(ret
.charset
, ret
.bullet
))
198 ret
.bullet
= uadv(ret
.bullet
);
200 while (*ret
.rule
&& *uadv(ret
.rule
) &&
201 !troff_ok(ret
.charset
, ret
.rule
))
202 ret
.rule
= uadv(ret
.rule
);
207 static void man_conf_cleanup(manconfig cf
)
213 paragraph
*man_config_filename(char *filename
)
215 return cmdline_cfg_simple("man-filename", filename
, NULL
);
218 #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
219 #define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
220 #define QUOTE_LITERAL 4 /* defeat special meaning of `, ', - in troff */
222 void man_backend(paragraph
*sourceform
, keywordlist
*keywords
,
223 indexdata
*idx
, void *unused
) {
227 int had_described_thing
;
233 conf
= man_configure(sourceform
);
236 * Open the output file.
238 fp
= fopen(conf
.filename
, "w");
240 error(err_cantopenw
, conf
.filename
);
244 /* Do the version ID */
245 for (p
= sourceform
; p
; p
= p
->next
)
246 if (p
->type
== para_VersionID
) {
247 fprintf(fp
, ".\\\" ");
248 man_text(fp
, p
->words
, TRUE
, 0, &conf
);
251 /* Standard preamble */
252 /* Dodge to try to get literal U+0027 in output when required,
253 * bypassing groff's Unicode transform; pinched from pod2man */
254 fprintf(fp
, ".ie \\n(.g .ds Aq \\(aq\n"
257 /* .TH name-of-program manual-section */
259 if (conf
.th
&& *conf
.th
) {
263 for (wp
= conf
.th
; *wp
; wp
= uadv(wp
)) {
265 man_convert(wp
, 0, &c
, QUOTE_QUOTES
, conf
.charset
, NULL
);
273 had_described_thing
= FALSE
;
274 #define cleanup_described_thing do { \
275 if (had_described_thing) \
277 had_described_thing = FALSE; \
280 for (p
= sourceform
; p
; p
= p
->next
) switch (p
->type
) {
282 * Things we ignore because we've already processed them or
283 * aren't going to touch them in this pass.
287 case para_Biblio
: /* only touch BiblioCited */
298 case para_UnnumberedChapter
:
302 cleanup_described_thing
;
305 if (p
->type
== para_Subsect
)
307 else if (p
->type
== para_Heading
)
311 if (depth
>= conf
.mindepth
) {
312 if (depth
> conf
.mindepth
)
313 fprintf(fp
, ".SS \"");
315 fprintf(fp
, ".SH \"");
316 if (conf
.headnumbers
&& p
->kwtext
) {
317 man_text(fp
, p
->kwtext
, FALSE
, QUOTE_QUOTES
, &conf
);
320 man_text(fp
, p
->words
, FALSE
, QUOTE_QUOTES
, &conf
);
330 cleanup_described_thing
;
331 fprintf(fp
, ".PP\n");
332 man_codepara(fp
, p
->words
, conf
.charset
);
340 cleanup_described_thing
;
341 fprintf(fp
, ".PP\n");
342 man_text(fp
, p
->words
, TRUE
, 0, &conf
);
348 case para_Description
:
349 case para_BiblioCited
:
351 case para_NumberedList
:
352 if (p
->type
!= para_Description
)
353 cleanup_described_thing
;
355 if (p
->type
== para_Bullet
) {
357 man_convert(conf
.bullet
, -1, &bullettext
, QUOTE_QUOTES
,
359 fprintf(fp
, ".IP \"\\fB%s\\fP\"\n", bullettext
);
361 } else if (p
->type
== para_NumberedList
) {
362 fprintf(fp
, ".IP \"");
363 man_text(fp
, p
->kwtext
, FALSE
, QUOTE_QUOTES
, &conf
);
365 } else if (p
->type
== para_Description
) {
366 if (had_described_thing
) {
368 * Do nothing; the .xP for this paragraph is the
369 * .IP which has come before it in the
374 * A \dd without a preceding \dt is given a blank
377 fprintf(fp
, ".IP \"\"\n");
379 } else if (p
->type
== para_BiblioCited
) {
380 fprintf(fp
, ".IP \"");
381 man_text(fp
, p
->kwtext
, FALSE
, QUOTE_QUOTES
, &conf
);
384 man_text(fp
, p
->words
, TRUE
, 0, &conf
);
385 had_described_thing
= FALSE
;
388 case para_DescribedThing
:
389 cleanup_described_thing
;
390 fprintf(fp
, ".IP \"");
391 man_text(fp
, p
->words
, FALSE
, QUOTE_QUOTES
, &conf
);
393 had_described_thing
= TRUE
;
400 * New paragraph containing a horizontal line 1/2em above
401 * the baseline, or a line of rule characters, whose
402 * length is the line length minus the current indent.
404 cleanup_described_thing
;
405 man_convert(conf
.rule
, -1, &ruletext
, 0, conf
.charset
, NULL
);
406 fprintf(fp
, ".PP\n.ie t \\u\\l'\\n(.lu-\\n(.iu'\\d\n"
407 ".el \\l'\\n(.lu-\\n(.iu\\&%s'\n", ruletext
);
414 cleanup_described_thing
;
415 fprintf(fp
, ".RS\n");
419 cleanup_described_thing
;
420 fprintf(fp
, ".RE\n");
423 cleanup_described_thing
;
429 man_conf_cleanup(conf
);
433 * Convert a wide string into a string of chars; mallocs the
434 * resulting string and stores a pointer to it in `*result'.
436 * If `state' is non-NULL, updates the charset state pointed to. If
437 * `state' is NULL, this function uses its own state, initialises
438 * it from scratch, and cleans it up when finished. If `state' is
439 * non-NULL but _s_ is NULL, cleans up a provided state.
441 * Return is nonzero if all characters are OK. If not all
442 * characters are OK but `result' is non-NULL, a result _will_
443 * still be generated!
445 * This function also does escaping of groff special characters.
447 static int man_convert(wchar_t const *s
, int maxlen
,
448 char **result
, int quote_props
,
449 int charset
, charset_state
*state
) {
450 charset_state internal_state
= CHARSET_INIT_STATE
;
453 int plen
= 0, psize
= 0;
454 rdstringc out
= {0, 0, NULL
};
458 state
= &internal_state
;
460 slen
= (s ?
ustrlen(s
) : 0);
462 if (slen
> maxlen
&& maxlen
> 0)
467 p
= snewn(psize
, char);
471 int ret
= charset_from_unicode(&s
, &slen
, p
, psize
,
472 charset
, state
, &err
);
475 for (q
= p
; q
< p
+plen
; q
++) {
476 if (q
== p
&& (*q
== '.' || *q
== '\'') &&
477 (quote_props
& QUOTE_INITCTRL
)) {
479 * Control character (. or ') at the start of a
480 * line. Quote it by putting \& (troff zero-width
486 if (*q
== '`' || *q
== ' ') {
487 /* Quote backticks and nonbreakable spaces always. */
489 } else if (*q
== '\\') {
490 /* Turn backslashes into \e. */
491 rdaddsc(&out
, "\\e");
493 } else if (*q
== '-' && !(quote_props
& QUOTE_LITERAL
)) {
494 /* Turn nonbreakable hyphens into \(hy. */
495 rdaddsc(&out
, "\\(hy");
497 } else if (*q
== '\'' && (quote_props
& QUOTE_LITERAL
)) {
498 /* Try to preserve literal U+0027 (using string defined
500 rdaddsc(&out
, "\\*(Aq"); /* "apostrophe quote" */
502 } else if (*q
== '"' && (quote_props
& QUOTE_QUOTES
)) {
504 * Double quote within double quotes. Quote it by
512 char const *tr
= troffchar(*s
);
519 /* Past start of string -- no more quoting needed */
520 quote_props
&= ~QUOTE_INITCTRL
;
523 if (state
== &internal_state
|| s
== NULL
) {
524 int ret
= charset_from_unicode(NULL
, 0, p
+plen
, psize
-plen
,
525 charset
, state
, NULL
);
533 *result
= rdtrimc(&out
);
535 *result
= dupstr("");
540 static int man_rdaddwc_reset(rdstringc
*rs
, int quote_props
, manconfig
*conf
,
541 charset_state
*state
) {
544 man_convert(NULL
, 0, &c
, quote_props
, conf
->charset
, state
);
547 quote_props
&= ~QUOTE_INITCTRL
; /* not at start any more */
549 *state
= charset_init_state
;
553 static int man_rdaddctrl(rdstringc
*rs
, char *c
, int quote_props
,
554 manconfig
*conf
, charset_state
*state
) {
555 quote_props
= man_rdaddwc_reset(rs
, quote_props
, conf
, state
);
560 static int man_rdaddwc(rdstringc
*rs
, word
*text
, word
*end
,
561 int quote_props
, manconfig
*conf
,
562 charset_state
*state
) {
565 for (; text
&& text
!= end
; text
= text
->next
) switch (text
->type
) {
578 case word_WhiteSpace
:
581 case word_WkCodeSpace
:
585 case word_WkCodeQuote
:
586 assert(text
->type
!= word_CodeQuote
&&
587 text
->type
!= word_WkCodeQuote
);
589 if (towordstyle(text
->type
) == word_Emph
&&
590 (attraux(text
->aux
) == attr_First
||
591 attraux(text
->aux
) == attr_Only
)) {
592 quote_props
= man_rdaddctrl(rs
, "\\fI", quote_props
, conf
, state
);
593 } else if ((towordstyle(text
->type
) == word_Code
||
594 towordstyle(text
->type
) == word_WeakCode
) &&
595 (attraux(text
->aux
) == attr_First
||
596 attraux(text
->aux
) == attr_Only
)) {
597 quote_props
= man_rdaddctrl(rs
, "\\fB", quote_props
, conf
, state
);
600 if (towordstyle(text
->type
) == word_Code
||
601 towordstyle(text
->type
) == word_WeakCode
)
602 quote_props
|= QUOTE_LITERAL
;
604 if (removeattr(text
->type
) == word_Normal
) {
605 charset_state s2
= *state
;
606 int len
= ustrlen(text
->text
), hyphen
= FALSE
;
608 if (text
->breaks
&& text
->text
[len
- 1] == '-') {
613 man_convert(text
->text
, len
, &c
, quote_props
, conf
->charset
,
619 quote_props
&= ~QUOTE_INITCTRL
; /* not at start any more */
624 man_rdaddctrl(rs
, "-", quote_props
, conf
, state
);
625 quote_props
&= ~QUOTE_INITCTRL
;
628 quote_props
= man_rdaddwc(rs
, text
->alt
, NULL
,
629 quote_props
, conf
, state
);
633 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
634 quote_props
= man_rdaddctrl(rs
, " ", quote_props
, conf
, state
);
635 quote_props
&= ~QUOTE_INITCTRL
;
636 } else if (removeattr(text
->type
) == word_Quote
) {
637 man_convert(quoteaux(text
->aux
) == quote_Open ?
638 conf
->lquote
: conf
->rquote
, 0,
639 &c
, quote_props
, conf
->charset
, state
);
642 quote_props
&= ~QUOTE_INITCTRL
; /* not at start any more */
645 if (towordstyle(text
->type
) != word_Normal
&&
646 (attraux(text
->aux
) == attr_Last
||
647 attraux(text
->aux
) == attr_Only
)) {
648 quote_props
= man_rdaddctrl(rs
, "\\fP", quote_props
, conf
, state
);
652 quote_props
= man_rdaddwc_reset(rs
, quote_props
, conf
, state
);
657 static void man_text(FILE *fp
, word
*text
, int newline
,
658 int quote_props
, manconfig
*conf
) {
659 rdstringc t
= { 0, 0, NULL
};
660 charset_state state
= CHARSET_INIT_STATE
;
662 man_rdaddwc(&t
, text
, NULL
, quote_props
| QUOTE_INITCTRL
, conf
, &state
);
663 fprintf(fp
, "%s", t
.text
);
669 static void man_codepara(FILE *fp
, word
*text
, int charset
) {
670 fprintf(fp
, ".nf\n");
671 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
674 int quote_props
= QUOTE_INITCTRL
| QUOTE_LITERAL
;
677 if (text
->next
&& text
->next
->type
== word_Emph
) {
678 e
= text
->next
->text
;
683 while (e
&& *e
&& *t
) {
687 for (n
= 0; t
[n
] && e
[n
] && e
[n
] == ec
; n
++);
692 man_convert(t
, n
, &c
, quote_props
, charset
, NULL
);
693 quote_props
&= ~QUOTE_INITCTRL
;
694 fprintf(fp
, "%s", c
);
696 if (ec
== 'i' || ec
== 'b')
701 man_convert(t
, 0, &c
, quote_props
, charset
, NULL
);
702 fprintf(fp
, "%s\n", c
);
705 fprintf(fp
, ".fi\n");