X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/edf3198063825f7cc83b3b67305b84a0f5267291..8f664e7e91c918cd13248f6b684580c4dd2cdb31:/bk_man.c diff --git a/bk_man.c b/bk_man.c index 553b2dd..bb4e5b5 100644 --- a/bk_man.c +++ b/bk_man.c @@ -13,7 +13,7 @@ typedef struct { int mindepth; char *filename; int charset; - wchar_t *bullet, *lquote, *rquote; + wchar_t *bullet, *rule, *lquote, *rquote; } manconfig; static void man_text(FILE *, word *, @@ -23,6 +23,97 @@ static int man_convert(wchar_t const *s, int maxlen, char **result, int quote_props, int charset, charset_state *state); +/* + * My TROFF reference is "NROFF/TROFF User's Manual", Joseph + * F. Ossana, October 11 1976. + * + * not yet used: + * \(ru rule + * \(pl math plus + * \(mi math minus + * \(eq math equals + * \(ga grave accent + * \(ul underrule + * \(sl slash (matching bakslash) + * \(br box vertical rule + * \(br Bell System logo + * \(or or + * all characters for constructing large brackets + */ + +static struct { + unsigned short uni; + char const *troff; +} const man_charmap[] = { + {0x00A2, "\\(ct"}, {0x00A7, "\\(sc"}, {0x00A9, "\\(co"}, {0x00AC, "\\(no"}, + {0x00AE, "\\(rg"}, {0x00B0, "\\(de"}, {0x00B1, "\\(+-"}, {0x00B4, "\\(aa"}, + {0x00BC, "\\(14"}, {0x00BD, "\\(12"}, {0x00BE, "\\(34"}, {0x00D7, "\\(mu"}, + {0x00F7, "\\(di"}, + + {0x0391, "\\(*A"}, {0x0392, "\\(*B"}, {0x0393, "\\(*G"}, {0x0394, "\\(*D"}, + {0x0395, "\\(*E"}, {0x0396, "\\(*Z"}, {0x0397, "\\(*Y"}, {0x0398, "\\(*H"}, + {0x0399, "\\(*I"}, {0x039A, "\\(*K"}, {0x039B, "\\(*L"}, {0x039C, "\\(*M"}, + {0x039D, "\\(*N"}, {0x039E, "\\(*C"}, {0x039F, "\\(*O"}, {0x03A0, "\\(*P"}, + {0x03A1, "\\(*R"}, {0x03A3, "\\(*S"}, {0x03A4, "\\(*T"}, {0x03A5, "\\(*U"}, + {0x03A6, "\\(*F"}, {0x03A7, "\\(*X"}, {0x03A8, "\\(*Q"}, {0x03A9, "\\(*W"}, + {0x03B1, "\\(*a"}, {0x03B2, "\\(*b"}, {0x03B3, "\\(*g"}, {0x03B4, "\\(*d"}, + {0x03B5, "\\(*e"}, {0x03B6, "\\(*z"}, {0x03B7, "\\(*y"}, {0x03B8, "\\(*h"}, + {0x03B9, "\\(*i"}, {0x03BA, "\\(*k"}, {0x03BB, "\\(*l"}, {0x03BC, "\\(*m"}, + {0x03BD, "\\(*n"}, {0x03BE, "\\(*c"}, {0x03BF, "\\(*o"}, {0x03C0, "\\(*p"}, + {0x03C1, "\\(*r"}, {0x03C2, "\\(ts"}, {0x03C3, "\\(*s"}, {0x03C4, "\\(*t"}, + {0x03C5, "\\(*u"}, {0x03C6, "\\(*f"}, {0x03C7, "\\(*x"}, {0x03C8, "\\(*q"}, + {0x03C9, "\\(*w"}, + + {0x2014, "\\(em"}, {0x2018, "`"}, {0x2019, "'"}, {0x2020, "\\(dg"}, + {0x2021, "\\(dd"}, {0x2022, "\\(bu"}, {0x2032, "\\(fm"}, + + {0x2190, "\\(<-"}, {0x2191, "\\(ua"}, {0x2192, "\\(->"}, {0x2193, "\\(da"}, + + {0x2202, "\\(pd"}, {0x2205, "\\(es"}, {0x2207, "\\(gr"}, {0x2208, "\\(mo"}, + {0x2212, "\\-"}, {0x2217, "\\(**"}, {0x221A, "\\(sr"}, {0x221D, "\\(pt"}, + {0x221E, "\\(if"}, {0x2229, "\\(ca"}, {0x222A, "\\(cu"}, {0x222B, "\\(is"}, + {0x223C, "\\(ap"}, {0x2245, "\\(~="}, {0x2260, "\\(!="}, {0x2261, "\\(=="}, + {0x2264, "\\(<="}, {0x2265, "\\(>="}, {0x2282, "\\(sb"}, {0x2283, "\\(sp"}, + {0x2286, "\\(ib"}, {0x2287, "\\(ip"}, + + {0x25A1, "\\(sq"}, {0x25CB, "\\(ci"}, + + {0x261C, "\\(lh"}, {0x261E, "\\(rh"}, +}; + +static char const *troffchar(int unichar) { + int i, j, k; + + i = -1; + j = lenof(man_charmap); + while (j-i > 1) { + k = (i + j) / 2; + if (man_charmap[k].uni == unichar) + return man_charmap[k].troff; + else if (man_charmap[k].uni > unichar) + j = k; + else + i = k; + } + return NULL; +} + +/* + * Return TRUE if we can represent the whole of the given string either + * in the output charset or as named characters; FALSE otherwise. + */ +static int troff_ok(int charset, wchar_t *string) { + wchar_t test[2]; + while (*string) { + test[0] = *string; + test[1] = 0; + if (!cvt_ok(charset, test) && !troffchar(*string)) + return FALSE; + string++; + } + return TRUE; +} + static manconfig man_configure(paragraph *source) { paragraph *p; manconfig ret; @@ -36,6 +127,7 @@ static manconfig man_configure(paragraph *source) { ret.filename = dupstr("output.1"); ret.charset = CS_ASCII; ret.bullet = L"\x2022\0o\0\0"; + ret.rule = L"\x2500\0-\0\0"; ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0"; ret.rquote = uadv(ret.lquote); @@ -79,6 +171,8 @@ static manconfig man_configure(paragraph *source) { ret.filename = dupstr(adv(p->origkeyword)); } else if (!ustricmp(p->keyword, L"man-bullet")) { ret.bullet = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"man-rule")) { + ret.rule = uadv(p->keyword); } else if (!ustricmp(p->keyword, L"man-quotes")) { if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) { ret.lquote = uadv(p->keyword); @@ -89,19 +183,24 @@ static manconfig man_configure(paragraph *source) { } /* - * Now process fallbacks on quote characters and bullets. + * Now process fallbacks on quote characters, bullets, and the + * rule character. */ while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) && - (!cvt_ok(ret.charset, ret.lquote) || - !cvt_ok(ret.charset, ret.rquote))) { + (!troff_ok(ret.charset, ret.lquote) || + !troff_ok(ret.charset, ret.rquote))) { ret.lquote = uadv(ret.rquote); ret.rquote = uadv(ret.lquote); } while (*ret.bullet && *uadv(ret.bullet) && - !cvt_ok(ret.charset, ret.bullet)) + !troff_ok(ret.charset, ret.bullet)) ret.bullet = uadv(ret.bullet); + while (*ret.rule && *uadv(ret.rule) && + !troff_ok(ret.charset, ret.rule)) + ret.rule = uadv(ret.rule); + return ret; } @@ -116,8 +215,9 @@ paragraph *man_config_filename(char *filename) return cmdline_cfg_simple("man-filename", filename, NULL); } -#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */ -#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */ +#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */ +#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */ +#define QUOTE_LITERAL 4 /* defeat special meaning of `, ', - in troff */ void man_backend(paragraph *sourceform, keywordlist *keywords, indexdata *idx, void *unused) { @@ -135,7 +235,10 @@ void man_backend(paragraph *sourceform, keywordlist *keywords, /* * Open the output file. */ - fp = fopen(conf.filename, "w"); + if (!strcmp(conf.filename, "-")) + fp = stdout; + else + fp = fopen(conf.filename, "w"); if (!fp) { error(err_cantopenw, conf.filename); return; @@ -148,6 +251,12 @@ void man_backend(paragraph *sourceform, keywordlist *keywords, man_text(fp, p->words, TRUE, 0, &conf); } + /* Standard preamble */ + /* Dodge to try to get literal U+0027 in output when required, + * bypassing groff's Unicode transform; pinched from pod2man */ + fprintf(fp, ".ie \\n(.g .ds Aq \\(aq\n" + ".el .ds Aq '\n"); + /* .TH name-of-program manual-section */ fprintf(fp, ".TH"); if (conf.th && *conf.th) { @@ -288,12 +397,19 @@ void man_backend(paragraph *sourceform, keywordlist *keywords, break; case para_Rule: - /* - * This isn't terribly good. Anyone who wants to do better - * should feel free! - */ - cleanup_described_thing; - fprintf(fp, ".PP\n----------------------------------------\n"); + { + char *ruletext; + /* + * New paragraph containing a horizontal line 1/2em above + * the baseline, or a line of rule characters, whose + * length is the line length minus the current indent. + */ + cleanup_described_thing; + man_convert(conf.rule, -1, &ruletext, 0, conf.charset, NULL); + fprintf(fp, ".PP\n.ie t \\u\\l'\\n(.lu-\\n(.iu'\\d\n" + ".el \\l'\\n(.lu-\\n(.iu\\&%s'\n", ruletext); + sfree(ruletext); + } break; case para_LcontPush: @@ -312,7 +428,8 @@ void man_backend(paragraph *sourceform, keywordlist *keywords, /* * Tidy up. */ - fclose(fp); + if (fp != stdout) + fclose(fp); man_conf_cleanup(conf); } @@ -339,6 +456,7 @@ static int man_convert(wchar_t const *s, int maxlen, char *p = NULL, *q; int plen = 0, psize = 0; rdstringc out = {0, 0, NULL}; + int anyerr = 0; if (!state) state = &internal_state; @@ -354,15 +472,69 @@ static int man_convert(wchar_t const *s, int maxlen, err = 0; while (slen > 0) { - int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen, - charset, state, (err ? NULL : &err)); - if (ret > 0) { - plen += ret; - if (psize - plen < 256) { - psize = plen + 256; - p = sresize(p, psize, char); + int ret = charset_from_unicode(&s, &slen, p, psize, + charset, state, &err); + plen = ret; + + for (q = p; q < p+plen; q++) { + if (q == p && (*q == '.' || *q == '\'') && + (quote_props & QUOTE_INITCTRL)) { + /* + * Control character (. or ') at the start of a + * line. Quote it by putting \& (troff zero-width + * space) before it. + */ + rdaddc(&out, '\\'); + rdaddc(&out, '&'); } + if (*q == '`' || *q == ' ') { + /* Quote backticks and nonbreakable spaces always. */ + rdaddc(&out, '\\'); + } else if (*q == '\\') { + /* Turn backslashes into \e. */ + rdaddsc(&out, "\\e"); + continue; + } else if (*q == '-') { + if (quote_props & QUOTE_LITERAL) { + /* + * Try to preserve literal U+002D. + * This is quite awkward. Debian hacks groff so that + * \- and - both produce it; elsewhere it's not necessarily + * possible to get it. + * Apparently \- is the preferred compromise despite + * having minus-sign semantics, as it is non-breaking. + * (pod2man uses it, anyway.) + */ + rdaddc(&out, '\\'); + } else { + /* Turn nonbreakable hyphens into \(hy. */ + rdaddsc(&out, "\\(hy"); + continue; + } + } else if (*q == '\'' && (quote_props & QUOTE_LITERAL)) { + /* Try to preserve literal U+0027 (using string defined + * in preamble) */ + rdaddsc(&out, "\\*(Aq"); /* "apostrophe quote" */ + continue; + } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) { + /* + * Double quote within double quotes. Quote it by + * doubling. + */ + rdaddc(&out, '"'); + } + rdaddc(&out, *q); + } + if (err) { + char const *tr = troffchar(*s); + if (tr == NULL) + anyerr = TRUE; + else + rdaddsc(&out, tr); + s++; slen--; } + /* Past start of string -- no more quoting needed */ + quote_props &= ~QUOTE_INITCTRL; } if (state == &internal_state || s == NULL) { @@ -372,31 +544,6 @@ static int man_convert(wchar_t const *s, int maxlen, plen += ret; } - for (q = p; q < p+plen; q++) { - if (q == p && (*q == '.' || *q == '\'') && - (quote_props & QUOTE_INITCTRL)) { - /* - * Control character (. or ') at the start of a - * line. Quote it by putting \& (troff zero-width - * space) before it. - */ - rdaddc(&out, '\\'); - rdaddc(&out, '&'); - } else if (*q == '\\') { - /* - * Quote backslashes by doubling them, always. - */ - rdaddc(&out, '\\'); - } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) { - /* - * Double quote within double quotes. Quote it by - * doubling. - */ - rdaddc(&out, '"'); - } - rdaddc(&out, *q); - } - sfree(p); if (out.text) @@ -404,7 +551,27 @@ static int man_convert(wchar_t const *s, int maxlen, else *result = dupstr(""); - return !err; + return !anyerr; +} + +static int man_rdaddwc_reset(rdstringc *rs, int quote_props, manconfig *conf, + charset_state *state) { + char *c; + + man_convert(NULL, 0, &c, quote_props, conf->charset, state); + rdaddsc(rs, c); + if (*c) + quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ + sfree(c); + *state = charset_init_state; + return quote_props; +} + +static int man_rdaddctrl(rdstringc *rs, char *c, int quote_props, + manconfig *conf, charset_state *state) { + quote_props = man_rdaddwc_reset(rs, quote_props, conf, state); + rdaddsc(rs, c); + return quote_props; } static int man_rdaddwc(rdstringc *rs, word *text, word *end, @@ -439,46 +606,50 @@ static int man_rdaddwc(rdstringc *rs, word *text, word *end, if (towordstyle(text->type) == word_Emph && (attraux(text->aux) == attr_First || attraux(text->aux) == attr_Only)) { - man_convert(NULL, 0, &c, quote_props, conf->charset, state); - rdaddsc(rs, c); - if (*c) - quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ - sfree(c); - *state = charset_init_state; - rdaddsc(rs, "\\fI"); + quote_props = man_rdaddctrl(rs, "\\fI", quote_props, conf, state); } else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && (attraux(text->aux) == attr_First || attraux(text->aux) == attr_Only)) { - man_convert(NULL, 0, &c, quote_props, conf->charset, state); - rdaddsc(rs, c); - if (*c) - quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ - sfree(c); - *state = charset_init_state; - rdaddsc(rs, "\\fB"); + quote_props = man_rdaddctrl(rs, "\\fB", quote_props, conf, state); } + if (towordstyle(text->type) == word_Code || + towordstyle(text->type) == word_WeakCode) + quote_props |= QUOTE_LITERAL; + if (removeattr(text->type) == word_Normal) { charset_state s2 = *state; + int len = ustrlen(text->text), hyphen = FALSE; - if (man_convert(text->text, 0, &c, quote_props, conf->charset, &s2) || + if (text->breaks && text->text[len - 1] == '-') { + len--; + hyphen = TRUE; + } + if (len == 0 || + man_convert(text->text, len, &c, quote_props, conf->charset, + &s2) || !text->alt) { - rdaddsc(rs, c); - if (*c) - quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ - *state = s2; + if (len != 0) { + rdaddsc(rs, c); + if (*c) + quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ + *state = s2; + } + if (hyphen) { + quote_props = + man_rdaddctrl(rs, "-", quote_props, conf, state); + quote_props &= ~QUOTE_INITCTRL; + } } else { quote_props = man_rdaddwc(rs, text->alt, NULL, quote_props, conf, state); } - sfree(c); + if (len != 0) + sfree(c); } else if (removeattr(text->type) == word_WhiteSpace) { - man_convert(L" ", 1, &c, quote_props, conf->charset, state); - rdaddsc(rs, c); - if (*c) - quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ - sfree(c); + quote_props = man_rdaddctrl(rs, " ", quote_props, conf, state); + quote_props &= ~QUOTE_INITCTRL; } else if (removeattr(text->type) == word_Quote) { man_convert(quoteaux(text->aux) == quote_Open ? conf->lquote : conf->rquote, 0, @@ -491,21 +662,11 @@ static int man_rdaddwc(rdstringc *rs, word *text, word *end, if (towordstyle(text->type) != word_Normal && (attraux(text->aux) == attr_Last || attraux(text->aux) == attr_Only)) { - man_convert(NULL, 0, &c, quote_props, conf->charset, state); - rdaddsc(rs, c); - if (*c) - quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ - sfree(c); - *state = charset_init_state; - rdaddsc(rs, "\\fP"); + quote_props = man_rdaddctrl(rs, "\\fP", quote_props, conf, state); } break; } - man_convert(NULL, 0, &c, quote_props, conf->charset, state); - rdaddsc(rs, c); - if (*c) - quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ - sfree(c); + quote_props = man_rdaddwc_reset(rs, quote_props, conf, state); return quote_props; } @@ -527,7 +688,7 @@ static void man_codepara(FILE *fp, word *text, int charset) { for (; text; text = text->next) if (text->type == word_WeakCode) { char *c; wchar_t *t, *e; - int quote_props = QUOTE_INITCTRL; + int quote_props = QUOTE_INITCTRL | QUOTE_LITERAL; t = text->text; if (text->next && text->next->type == word_Emph) {