#include <assert.h>
#include "halibut.h"
-static void man_text(FILE *, word *, int newline, int quote_props);
-static void man_codepara(FILE *, word *);
-static int man_convert(wchar_t *s, int maxlen,
- char **result, int quote_props);
-
typedef struct {
wchar_t *th;
int headnumbers;
int mindepth;
char *filename;
+ int charset;
+ wchar_t *bullet, *lquote, *rquote;
} manconfig;
+static void man_text(FILE *, word *,
+ int newline, int quote_props, manconfig *conf);
+static void man_codepara(FILE *, word *, int charset);
+static int man_convert(wchar_t const *s, int maxlen,
+ char **result, int quote_props,
+ int charset, charset_state *state);
+
static manconfig man_configure(paragraph *source) {
+ paragraph *p;
manconfig ret;
/*
ret.headnumbers = FALSE;
ret.mindepth = 0;
ret.filename = dupstr("output.1");
+ ret.charset = CS_ASCII;
+ ret.bullet = L"\x2022\0o\0\0";
+ ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
+ ret.rquote = uadv(ret.lquote);
+
+ /*
+ * Two-pass configuration so that we can pick up global config
+ * (e.g. `quotes') before having it overridden by specific
+ * config (`man-quotes'), irrespective of the order in which
+ * they occur.
+ */
+ for (p = source; p; p = p->next) {
+ if (p->type == para_Config) {
+ if (!ustricmp(p->keyword, L"quotes")) {
+ if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+ ret.lquote = uadv(p->keyword);
+ ret.rquote = uadv(ret.lquote);
+ }
+ }
+ }
+ }
- for (; source; source = source->next) {
- if (source->type == para_Config) {
- if (!ustricmp(source->keyword, L"man-identity")) {
+ for (p = source; p; p = p->next) {
+ if (p->type == para_Config) {
+ if (!ustricmp(p->keyword, L"man-identity")) {
wchar_t *wp, *ep;
- wp = uadv(source->keyword);
+ wp = uadv(p->keyword);
ep = wp;
while (*ep)
ep = uadv(ep);
sfree(ret.th);
- ret.th = mknewa(wchar_t, ep - wp + 1);
+ ret.th = snewn(ep - wp + 1, wchar_t);
memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
- } else if (!ustricmp(source->keyword, L"man-headnumbers")) {
- ret.headnumbers = utob(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"man-mindepth")) {
- ret.mindepth = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"man-filename")) {
+ } else if (!ustricmp(p->keyword, L"man-charset")) {
+ ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"man-headnumbers")) {
+ ret.headnumbers = utob(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"man-mindepth")) {
+ ret.mindepth = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"man-filename")) {
sfree(ret.filename);
- ret.filename = utoa_dup(uadv(source->keyword));
+ ret.filename = dupstr(adv(p->origkeyword));
+ } else if (!ustricmp(p->keyword, L"man-bullet")) {
+ ret.bullet = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"man-quotes")) {
+ if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+ ret.lquote = uadv(p->keyword);
+ ret.rquote = uadv(ret.lquote);
+ }
}
}
}
+ /*
+ * Now process fallbacks on quote characters and bullets.
+ */
+ while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
+ (!cvt_ok(ret.charset, ret.lquote) ||
+ !cvt_ok(ret.charset, ret.rquote))) {
+ ret.lquote = uadv(ret.rquote);
+ ret.rquote = uadv(ret.lquote);
+ }
+
+ while (*ret.bullet && *uadv(ret.bullet) &&
+ !cvt_ok(ret.charset, ret.bullet))
+ ret.bullet = uadv(ret.bullet);
+
return ret;
}
paragraph *man_config_filename(char *filename)
{
- paragraph *p;
- wchar_t *ufilename, *up;
- int len;
-
- p = mknew(paragraph);
- memset(p, 0, sizeof(*p));
- p->type = para_Config;
- p->next = NULL;
- p->fpos.filename = "<command line>";
- p->fpos.line = p->fpos.col = -1;
-
- ufilename = ufroma_dup(filename);
- len = ustrlen(ufilename) + 2 + lenof(L"man-filename");
- p->keyword = mknewa(wchar_t, len);
- up = p->keyword;
- ustrcpy(up, L"man-filename");
- up = uadv(up);
- ustrcpy(up, ufilename);
- up = uadv(up);
- *up = L'\0';
- assert(up - p->keyword < len);
- sfree(ufilename);
-
- return p;
+ return cmdline_cfg_simple("man-filename", filename, NULL);
}
#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
void man_backend(paragraph *sourceform, keywordlist *keywords,
- indexdata *idx) {
+ indexdata *idx, void *unused) {
paragraph *p;
FILE *fp;
manconfig conf;
+ int had_described_thing;
- IGNORE(keywords); /* we don't happen to need this */
- IGNORE(idx); /* or this */
+ IGNORE(unused);
+ IGNORE(keywords);
+ IGNORE(idx);
conf = man_configure(sourceform);
for (p = sourceform; p; p = p->next)
if (p->type == para_VersionID) {
fprintf(fp, ".\\\" ");
- man_text(fp, p->words, TRUE, 0);
+ man_text(fp, p->words, TRUE, 0, &conf);
}
/* .TH name-of-program manual-section */
for (wp = conf.th; *wp; wp = uadv(wp)) {
fputs(" \"", fp);
- man_convert(wp, 0, &c, QUOTE_QUOTES);
+ man_convert(wp, 0, &c, QUOTE_QUOTES, conf.charset, NULL);
fputs(c, fp);
sfree(c);
fputc('"', fp);
}
fputc('\n', fp);
- fprintf(fp, ".UC\n");
+ had_described_thing = FALSE;
+#define cleanup_described_thing do { \
+ if (had_described_thing) \
+ fprintf(fp, "\n"); \
+ had_described_thing = FALSE; \
+} while (0)
for (p = sourceform; p; p = p->next) switch (p->type) {
/*
case para_Heading:
case para_Subsect:
+ cleanup_described_thing;
{
int depth;
if (p->type == para_Subsect)
- depth = p->aux + 2;
+ depth = p->aux + 1;
else if (p->type == para_Heading)
depth = 1;
else
depth = 0;
if (depth >= conf.mindepth) {
- fprintf(fp, ".SH \"");
+ if (depth > conf.mindepth)
+ fprintf(fp, ".SS \"");
+ else
+ fprintf(fp, ".SH \"");
if (conf.headnumbers && p->kwtext) {
- man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
+ man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
fprintf(fp, " ");
}
- man_text(fp, p->words, FALSE, QUOTE_QUOTES);
+ man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
fprintf(fp, "\"\n");
}
break;
* Code paragraphs.
*/
case para_Code:
+ cleanup_described_thing;
fprintf(fp, ".PP\n");
- man_codepara(fp, p->words);
+ man_codepara(fp, p->words, conf.charset);
break;
/*
*/
case para_Normal:
case para_Copyright:
+ cleanup_described_thing;
fprintf(fp, ".PP\n");
- man_text(fp, p->words, TRUE, 0);
+ man_text(fp, p->words, TRUE, 0, &conf);
break;
/*
case para_BiblioCited:
case para_Bullet:
case para_NumberedList:
+ if (p->type != para_Description)
+ cleanup_described_thing;
+
if (p->type == para_Bullet) {
- fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
+ char *bullettext;
+ man_convert(conf.bullet, -1, &bullettext, QUOTE_QUOTES,
+ conf.charset, NULL);
+ fprintf(fp, ".IP \"\\fB%s\\fP\"\n", bullettext);
+ sfree(bullettext);
} else if (p->type == para_NumberedList) {
fprintf(fp, ".IP \"");
- man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
+ man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
fprintf(fp, "\"\n");
} else if (p->type == para_Description) {
- /*
- * Do nothing; the .xP for this paragraph is the .IP
- * which has come before it in the DescribedThing.
- */
+ if (had_described_thing) {
+ /*
+ * Do nothing; the .xP for this paragraph is the
+ * .IP which has come before it in the
+ * DescribedThing.
+ */
+ } else {
+ /*
+ * A \dd without a preceding \dt is given a blank
+ * one.
+ */
+ fprintf(fp, ".IP \"\"\n");
+ }
} else if (p->type == para_BiblioCited) {
fprintf(fp, ".IP \"");
- man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
+ man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
fprintf(fp, "\"\n");
}
- man_text(fp, p->words, TRUE, 0);
+ man_text(fp, p->words, TRUE, 0, &conf);
+ had_described_thing = FALSE;
break;
case para_DescribedThing:
+ cleanup_described_thing;
fprintf(fp, ".IP \"");
- man_text(fp, p->words, FALSE, QUOTE_QUOTES);
+ man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
fprintf(fp, "\"\n");
+ had_described_thing = TRUE;
break;
case para_Rule:
* This isn't terribly good. Anyone who wants to do better
* should feel free!
*/
+ cleanup_described_thing;
fprintf(fp, ".PP\n----------------------------------------\n");
break;
case para_LcontPush:
case para_QuotePush:
+ cleanup_described_thing;
fprintf(fp, ".RS\n");
break;
case para_LcontPop:
case para_QuotePop:
+ cleanup_described_thing;
fprintf(fp, ".RE\n");
break;
}
+ cleanup_described_thing;
/*
* Tidy up.
}
/*
- * Convert a wide string into a string of chars. If `result' is
- * non-NULL, mallocs the resulting string and stores a pointer to
- * it in `*result'. If `result' is NULL, merely checks whether all
- * characters in the string are feasible for the output character
- * set.
+ * Convert a wide string into a string of chars; mallocs the
+ * resulting string and stores a pointer to it in `*result'.
+ *
+ * If `state' is non-NULL, updates the charset state pointed to. If
+ * `state' is NULL, this function uses its own state, initialises
+ * it from scratch, and cleans it up when finished. If `state' is
+ * non-NULL but _s_ is NULL, cleans up a provided state.
*
* Return is nonzero if all characters are OK. If not all
* characters are OK but `result' is non-NULL, a result _will_
* still be generated!
*
- * FIXME: Here is probably also a good place to do escaping sorts
- * of things. I know I at least need to escape backslash, and full
- * stops at the starts of words are probably trouble as well.
+ * This function also does escaping of groff special characters.
*/
-static int man_convert(wchar_t *s, int maxlen,
- char **result, int quote_props) {
- /*
- * FIXME. Currently this is ISO8859-1 only.
- */
- int doing = (result != 0);
- int ok = TRUE;
- char *p = NULL;
+static int man_convert(wchar_t const *s, int maxlen,
+ char **result, int quote_props,
+ int charset, charset_state *state) {
+ charset_state internal_state = CHARSET_INIT_STATE;
+ int slen, err;
+ char *p = NULL, *q;
int plen = 0, psize = 0;
+ rdstringc out = {0, 0, NULL};
- if (maxlen <= 0)
- maxlen = -1;
-
- for (; *s && maxlen != 0; s++, maxlen--) {
- wchar_t c = *s;
- char outc;
-
- if ((c >= 32 && c <= 126) ||
- (c >= 160 && c <= 255)) {
- /* Char is OK. */
- outc = (char)c;
- } else {
- /* Char is not OK. */
- ok = FALSE;
- outc = 0xBF; /* approximate the good old DEC `uh?' */
- }
- if (doing) {
- if (plen+3 >= psize) {
+ if (!state)
+ state = &internal_state;
+
+ slen = (s ? ustrlen(s) : 0);
+
+ if (slen > maxlen && maxlen > 0)
+ slen = maxlen;
+
+ psize = 384;
+ plen = 0;
+ p = snewn(psize, char);
+ err = 0;
+
+ while (slen > 0) {
+ int ret = charset_from_unicode(&s, &slen, p+plen, psize-plen,
+ charset, state, (err ? NULL : &err));
+ if (ret > 0) {
+ plen += ret;
+ if (psize - plen < 256) {
psize = plen + 256;
- p = resize(p, psize);
- }
- if (plen == 0 && (outc == '.' || outc == '\'') &&
- (quote_props & QUOTE_INITCTRL)) {
- /*
- * Control character (. or ') at the start of a
- * line. Quote it by putting \& (troff zero-width
- * space) before it.
- */
- p[plen++] = '\\';
- p[plen++] = '&';
- } else if (outc == '\\') {
- /*
- * Quote backslashes by doubling them, always.
- */
- p[plen++] = '\\';
- } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) {
- /*
- * Double quote within double quotes. Quote it by
- * doubling.
- */
- p[plen++] = '"';
+ p = sresize(p, psize, char);
}
- p[plen++] = outc;
}
}
- if (doing) {
- p = resize(p, plen+1);
- p[plen] = '\0';
- *result = p;
+
+ if (state == &internal_state || s == NULL) {
+ int ret = charset_from_unicode(NULL, 0, p+plen, psize-plen,
+ charset, state, NULL);
+ if (ret > 0)
+ plen += ret;
}
- return ok;
+
+ for (q = p; q < p+plen; q++) {
+ if (q == p && (*q == '.' || *q == '\'') &&
+ (quote_props & QUOTE_INITCTRL)) {
+ /*
+ * Control character (. or ') at the start of a
+ * line. Quote it by putting \& (troff zero-width
+ * space) before it.
+ */
+ rdaddc(&out, '\\');
+ rdaddc(&out, '&');
+ } else if (*q == '\\') {
+ /*
+ * Quote backslashes by doubling them, always.
+ */
+ rdaddc(&out, '\\');
+ } else if (*q == '"' && (quote_props & QUOTE_QUOTES)) {
+ /*
+ * Double quote within double quotes. Quote it by
+ * doubling.
+ */
+ rdaddc(&out, '"');
+ }
+ rdaddc(&out, *q);
+ }
+
+ sfree(p);
+
+ if (out.text)
+ *result = rdtrimc(&out);
+ else
+ *result = dupstr("");
+
+ return !err;
}
-static void man_rdaddwc(rdstringc *rs, word *text, word *end,
- int quote_props) {
+static int man_rdaddwc(rdstringc *rs, word *text, word *end,
+ int quote_props, manconfig *conf,
+ charset_state *state) {
char *c;
for (; text && text != end; text = text->next) switch (text->type) {
case word_WkCodeQuote:
assert(text->type != word_CodeQuote &&
text->type != word_WkCodeQuote);
+
if (towordstyle(text->type) == word_Emph &&
(attraux(text->aux) == attr_First ||
- attraux(text->aux) == attr_Only))
+ attraux(text->aux) == attr_Only)) {
+ man_convert(NULL, 0, &c, quote_props, conf->charset, state);
+ rdaddsc(rs, c);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ sfree(c);
+ *state = charset_init_state;
rdaddsc(rs, "\\fI");
- else if ((towordstyle(text->type) == word_Code ||
- towordstyle(text->type) == word_WeakCode) &&
- (attraux(text->aux) == attr_First ||
- attraux(text->aux) == attr_Only))
+ } else if ((towordstyle(text->type) == word_Code ||
+ towordstyle(text->type) == word_WeakCode) &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only)) {
+ man_convert(NULL, 0, &c, quote_props, conf->charset, state);
+ rdaddsc(rs, c);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ sfree(c);
+ *state = charset_init_state;
rdaddsc(rs, "\\fB");
+ }
+
if (removeattr(text->type) == word_Normal) {
- if (rs->pos > 0)
- quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
- if (man_convert(text->text, 0, &c, quote_props))
+ charset_state s2 = *state;
+
+ if (man_convert(text->text, 0, &c, quote_props, conf->charset, &s2) ||
+ !text->alt) {
rdaddsc(rs, c);
- else
- man_rdaddwc(rs, text->alt, NULL, quote_props);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ *state = s2;
+ } else {
+ quote_props = man_rdaddwc(rs, text->alt, NULL,
+ quote_props, conf, state);
+ }
sfree(c);
} else if (removeattr(text->type) == word_WhiteSpace) {
- rdaddc(rs, ' ');
+ man_convert(L" ", 1, &c, quote_props, conf->charset, state);
+ rdaddsc(rs, c);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ sfree(c);
} else if (removeattr(text->type) == word_Quote) {
- rdaddc(rs, '"');
- if (quote_props & QUOTE_QUOTES)
- rdaddc(rs, '"');
+ man_convert(quoteaux(text->aux) == quote_Open ?
+ conf->lquote : conf->rquote, 0,
+ &c, quote_props, conf->charset, state);
+ rdaddsc(rs, c);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ sfree(c);
}
- if (towordstyle(text->type) == word_Emph &&
+ if (towordstyle(text->type) != word_Normal &&
(attraux(text->aux) == attr_Last ||
- attraux(text->aux) == attr_Only))
- rdaddsc(rs, "\\fP");
- else if ((towordstyle(text->type) == word_Code ||
- towordstyle(text->type) == word_WeakCode) &&
- (attraux(text->aux) == attr_Last ||
- attraux(text->aux) == attr_Only))
+ attraux(text->aux) == attr_Only)) {
+ man_convert(NULL, 0, &c, quote_props, conf->charset, state);
+ rdaddsc(rs, c);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ sfree(c);
+ *state = charset_init_state;
rdaddsc(rs, "\\fP");
+ }
break;
}
+ man_convert(NULL, 0, &c, quote_props, conf->charset, state);
+ rdaddsc(rs, c);
+ if (*c)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ sfree(c);
+
+ return quote_props;
}
-static void man_text(FILE *fp, word *text, int newline, int quote_props) {
+static void man_text(FILE *fp, word *text, int newline,
+ int quote_props, manconfig *conf) {
rdstringc t = { 0, 0, NULL };
+ charset_state state = CHARSET_INIT_STATE;
- man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL);
+ man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL, conf, &state);
fprintf(fp, "%s", t.text);
sfree(t.text);
if (newline)
fputc('\n', fp);
}
-static void man_codepara(FILE *fp, word *text) {
+static void man_codepara(FILE *fp, word *text, int charset) {
fprintf(fp, ".nf\n");
for (; text; text = text->next) if (text->type == word_WeakCode) {
char *c;
fprintf(fp, "\\fI");
else if (ec == 'b')
fprintf(fp, "\\fB");
- man_convert(t, n, &c, quote_props);
+ man_convert(t, n, &c, quote_props, charset, NULL);
quote_props &= ~QUOTE_INITCTRL;
fprintf(fp, "%s", c);
sfree(c);
t += n;
e += n;
}
- man_convert(t, 0, &c, quote_props);
+ man_convert(t, 0, &c, quote_props, charset, NULL);
fprintf(fp, "%s\n", c);
sfree(c);
}