From 7136a6c7f094fa423c48ec319748c4fd7e1fa645 Mon Sep 17 00:00:00 2001
From: simon
Date: Tue, 23 Mar 2004 20:10:23 +0000
Subject: [PATCH] Man-page back end for Halibut. Also, a couple of additional
markup features commonly used in man pages: (a) the ability to nest paragraph
breaks, code paragraphs and other lists inside list items, and (b)
description lists as normally used in man pages to describe command-line
options.
git-svn-id: svn://svn.tartarus.org/sgt/halibut@3954 cda61777-01e9-0310-a592-d414129be87e
---
Makefile | 2 +-
bk_man.c | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bk_text.c | 30 +++++-
bk_whlp.c | 17 ++-
bk_xhtml.c | 71 +++++++++---
error.c | 15 +++
halibut.h | 13 +++
input.c | 136 +++++++++++++++++++++--
inputs/test.but | 68 +++++++++++-
main.c | 1 +
misc.c | 7 ++
11 files changed, 659 insertions(+), 31 deletions(-)
create mode 100644 bk_man.c
diff --git a/Makefile b/Makefile
index 35d2eef..bfd14a5 100644
--- a/Makefile
+++ b/Makefile
@@ -58,7 +58,7 @@ SRC := ../
MODULES := main malloc ustring error help licence version misc tree234
MODULES += input keywords contents index style biblio
-MODULES += bk_text bk_xhtml bk_whlp
+MODULES += bk_text bk_xhtml bk_whlp bk_man
MODULES += winhelp
OBJECTS := $(addsuffix .o,$(MODULES))
diff --git a/bk_man.c b/bk_man.c
new file mode 100644
index 0000000..02d942a
--- /dev/null
+++ b/bk_man.c
@@ -0,0 +1,330 @@
+/*
+ * man page backend for Halibut
+ */
+
+#include
+#include
+#include
+#include "halibut.h"
+
+static void man_text(FILE *, word *, int newline, int quote_props);
+static void man_codepara(FILE *, word *);
+
+#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
+#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */
+
+void man_backend(paragraph *sourceform, keywordlist *keywords,
+ indexdata *idx) {
+ paragraph *p;
+ FILE *fp;
+ char const *sep;
+
+ IGNORE(keywords); /* we don't happen to need this */
+ IGNORE(idx); /* or this */
+
+ /*
+ * Determine the output file name, and open the output file
+ *
+ * FIXME: want configurable output file names here. For the
+ * moment, we'll just call it `output.1'.
+ */
+ fp = fopen("output.1", "w");
+ if (!fp) {
+ error(err_cantopenw, "output.1");
+ return;
+ }
+
+ /* Do the version ID */
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_VersionID) {
+ fprintf(fp, ".\\\" ");
+ man_text(fp, p->words, TRUE, 0);
+ }
+
+ /* FIXME: .TH name-of-program manual-section */
+ fprintf(fp, ".TH FIXME 1\n");
+
+ fprintf(fp, ".UC\n");
+
+ /* Do the preamble and copyright */
+ sep = "";
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_Preamble) {
+ fprintf(fp, "%s", sep);
+ man_text(fp, p->words, TRUE, 0);
+ sep = "\n";
+ }
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_Copyright) {
+ fprintf(fp, "%s", sep);
+ man_text(fp, p->words, TRUE, 0);
+ sep = "\n";
+ }
+
+ /*
+ * FIXME:
+ *
+ * - figure out precisely what needs to be escaped.
+ * * A dot or apostrophe at the start of a line wants to be
+ * preceded by `\&', which is a zero-width space.
+ * * Literal backslashes always want doubling.
+ * * Within double quotes, a double quote needs doubling
+ * too.
+ *
+ * - work out what to do about hyphens / minuses...
+ */
+ for (p = sourceform; p; p = p->next) switch (p->type) {
+ /*
+ * Things we ignore because we've already processed them or
+ * aren't going to touch them in this pass.
+ */
+ case para_IM:
+ case para_BR:
+ case para_Biblio: /* only touch BiblioCited */
+ case para_VersionID:
+ case para_Copyright:
+ case para_Preamble:
+ case para_NoCite:
+ case para_Title:
+ break;
+
+ /*
+ * Headings.
+ */
+ case para_Chapter:
+ case para_Appendix:
+ case para_UnnumberedChapter:
+ case para_Heading:
+ case para_Subsect:
+ fprintf(fp, ".SH \"");
+ /* FIXME: disable this, at _least_ by default */
+ if (p->kwtext)
+ man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
+ fprintf(fp, " ");
+ man_text(fp, p->words, FALSE, QUOTE_QUOTES);
+ fprintf(fp, "\"\n");
+ break;
+
+ /*
+ * Code paragraphs.
+ */
+ case para_Code:
+ fprintf(fp, ".PP\n");
+ man_codepara(fp, p->words);
+ break;
+
+ /*
+ * Normal paragraphs.
+ */
+ case para_Normal:
+ fprintf(fp, ".PP\n");
+ man_text(fp, p->words, TRUE, 0);
+ break;
+
+ /*
+ * List paragraphs.
+ */
+ case para_Description:
+ case para_BiblioCited:
+ case para_Bullet:
+ case para_NumberedList:
+ if (p->type == para_Bullet) {
+ fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */
+ } else if (p->type == para_NumberedList) {
+ fprintf(fp, ".IP \"");
+ man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
+ fprintf(fp, "\"\n");
+ } else if (p->type == para_Description) {
+ /*
+ * Do nothing; the .xP for this paragraph is the .IP
+ * which has come before it in the DescribedThing.
+ */
+ } else if (p->type == para_BiblioCited) {
+ fprintf(fp, ".IP \"");
+ man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES);
+ fprintf(fp, "\"\n");
+ }
+ man_text(fp, p->words, TRUE, 0);
+ break;
+
+ case para_DescribedThing:
+ fprintf(fp, ".IP \"");
+ man_text(fp, p->words, FALSE, QUOTE_QUOTES);
+ fprintf(fp, "\"\n");
+ break;
+
+ case para_Rule:
+ /*
+ * FIXME.
+ */
+ break;
+
+ case para_LcontPush:
+ fprintf(fp, ".RS\n");
+ break;
+ case para_LcontPop:
+ fprintf(fp, ".RE\n");
+ break;
+ }
+
+ /*
+ * Tidy up.
+ */
+ fclose(fp);
+}
+
+/*
+ * Convert a wide string into a string of chars. If `result' is
+ * non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible for the output character
+ * set.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ *
+ * FIXME: Here is probably also a good place to do escaping sorts
+ * of things. I know I at least need to escape backslash, and full
+ * stops at the starts of words are probably trouble as well.
+ */
+static int man_convert(wchar_t *s, char **result, int quote_props) {
+ /*
+ * FIXME. Currently this is ISO8859-1 only.
+ */
+ int doing = (result != 0);
+ int ok = TRUE;
+ char *p = NULL;
+ int plen = 0, psize = 0;
+
+ for (; *s; s++) {
+ wchar_t c = *s;
+ char outc;
+
+ if ((c >= 32 && c <= 126) ||
+ (c >= 160 && c <= 255)) {
+ /* Char is OK. */
+ outc = (char)c;
+ } else {
+ /* Char is not OK. */
+ ok = FALSE;
+ outc = 0xBF; /* approximate the good old DEC `uh?' */
+ }
+ if (doing) {
+ if (plen+3 >= psize) {
+ psize = plen + 256;
+ p = resize(p, psize);
+ }
+ if (plen == 0 && (outc == '.' || outc == '\'') &&
+ (quote_props & QUOTE_INITCTRL)) {
+ /*
+ * Control character (. or ') at the start of a
+ * line. Quote it by putting \& (troff zero-width
+ * space) before it.
+ */
+ p[plen++] = '\\';
+ p[plen++] = '&';
+ } else if (outc == '\\') {
+ /*
+ * Quote backslashes by doubling them, always.
+ */
+ p[plen++] = '\\';
+ } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) {
+ /*
+ * Double quote within double quotes. Quote it by
+ * doubling.
+ */
+ p[plen++] = '"';
+ }
+ p[plen++] = outc;
+ }
+ }
+ if (doing) {
+ p = resize(p, plen+1);
+ p[plen] = '\0';
+ *result = p;
+ }
+ return ok;
+}
+
+static void man_rdaddwc(rdstringc *rs, word *text, word *end,
+ int quote_props) {
+ char *c;
+
+ for (; text && text != end; text = text->next) switch (text->type) {
+ case word_HyperLink:
+ case word_HyperEnd:
+ case word_UpperXref:
+ case word_LowerXref:
+ case word_XrefEnd:
+ case word_IndexRef:
+ break;
+
+ case word_Normal:
+ case word_Emph:
+ case word_Code:
+ case word_WeakCode:
+ case word_WhiteSpace:
+ case word_EmphSpace:
+ case word_CodeSpace:
+ case word_WkCodeSpace:
+ case word_Quote:
+ case word_EmphQuote:
+ case word_CodeQuote:
+ case word_WkCodeQuote:
+ assert(text->type != word_CodeQuote &&
+ text->type != word_WkCodeQuote);
+ if (towordstyle(text->type) == word_Emph &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "\\fI");
+ else if (towordstyle(text->type) == word_Code &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "\\fB");
+ if (removeattr(text->type) == word_Normal) {
+ if (rs->pos > 0)
+ quote_props &= ~QUOTE_INITCTRL; /* not at start any more */
+ if (man_convert(text->text, &c, quote_props))
+ rdaddsc(rs, c);
+ else
+ man_rdaddwc(rs, text->alt, NULL, quote_props);
+ sfree(c);
+ } else if (removeattr(text->type) == word_WhiteSpace) {
+ rdaddc(rs, ' ');
+ } else if (removeattr(text->type) == word_Quote) {
+ rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
+ /* FIXME: configurability */
+ }
+ if (towordstyle(text->type) == word_Emph &&
+ (attraux(text->aux) == attr_Last ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "\\fP");
+ else if (towordstyle(text->type) == word_Code &&
+ (attraux(text->aux) == attr_Last ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "\\fP");
+ break;
+ }
+}
+
+static void man_text(FILE *fp, word *text, int newline, int quote_props) {
+ rdstringc t = { 0, 0, NULL };
+
+ man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL);
+ fprintf(fp, "%s", t.text);
+ sfree(t.text);
+ if (newline)
+ fputc('\n', fp);
+}
+
+static void man_codepara(FILE *fp, word *text) {
+ fprintf(fp, ".nf\n");
+ for (; text; text = text->next) if (text->type == word_WeakCode) {
+ char *c;
+ man_convert(text->text, &c, QUOTE_INITCTRL);
+ fprintf(fp, "%s\n", c);
+ sfree(c);
+ }
+ fprintf(fp, ".fi\n");
+}
diff --git a/bk_text.c b/bk_text.c
index 48e99f4..989cdbf 100644
--- a/bk_text.c
+++ b/bk_text.c
@@ -182,6 +182,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
word spaceword;
FILE *fp;
char *prefixextra;
+ int nesting, nestindent;
int indentb, indenta;
IGNORE(keywords); /* we don't happen to need this */
@@ -219,9 +220,20 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
conf.indent_preambles ? conf.indent : 0, 0,
conf.width + (conf.indent_preambles ? 0 : conf.indent));
+ nestindent = conf.listindentbefore + conf.listindentafter;
+ nesting = 0;
+
/* Do the main document */
for (p = sourceform; p; p = p->next) switch (p->type) {
+ case para_LcontPush:
+ nesting++;
+ break;
+ case para_LcontPop:
+ assert(nesting > 0);
+ nesting--;
+ break;
+
/*
* Things we ignore because we've already processed them or
* aren't going to touch them in this pass.
@@ -254,10 +266,13 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
break;
case para_Rule:
- text_rule(fp, conf.indent, conf.width);
+ text_rule(fp, conf.indent + nestindent*nesting,
+ conf.width - nestindent*nesting);
break;
case para_Normal:
+ case para_DescribedThing:
+ case para_Description:
case para_BiblioCited:
case para_Bullet:
case para_NumberedList:
@@ -271,6 +286,11 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
prefixextra = "."; /* FIXME: configurability */
indentb = conf.listindentbefore;
indenta = conf.listindentafter;
+ } else if (p->type == para_Description) {
+ prefix = NULL;
+ prefixextra = NULL;
+ indentb = conf.listindentbefore;
+ indenta = conf.listindentafter;
} else {
prefix = NULL;
prefixextra = NULL;
@@ -289,8 +309,8 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
body = p->words;
}
text_para(fp, prefix, prefixextra, body,
- conf.indent + indentb, indenta,
- conf.width - indentb - indenta);
+ conf.indent + nestindent*nesting + indentb, indenta,
+ conf.width - nestindent*nesting - indentb - indenta);
if (wp) {
wp->next = NULL;
free_word_list(body);
@@ -298,7 +318,9 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
break;
case para_Code:
- text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code);
+ text_codepara(fp, p->words,
+ conf.indent + nestindent*nesting + conf.indent_code,
+ conf.width - nestindent*nesting - 2 * conf.indent_code);
break;
}
diff --git a/bk_whlp.c b/bk_whlp.c
index f4b1b2d..a21ac04 100644
--- a/bk_whlp.c
+++ b/bk_whlp.c
@@ -50,6 +50,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
struct bk_whlp_state state;
WHLP_TOPIC contents_topic;
int i;
+ int nesting;
indexentry *ie;
filename = "output.hlp"; /* FIXME: configurability */
@@ -216,6 +217,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
* Now we've done the contents page, we're ready to go through
* and do the main manual text. Ooh.
*/
+ nesting = 0;
for (p = sourceform; p; p = p->next) switch (p->type) {
/*
* Things we ignore because we've already processed them or
@@ -231,6 +233,14 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
case para_Title:
break;
+ case para_LcontPush:
+ nesting++;
+ break;
+ case para_LcontPop:
+ assert(nesting > 0);
+ nesting--;
+ break;
+
/*
* Chapter and section titles: start a new Help topic.
*/
@@ -343,12 +353,14 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
break;
case para_Normal:
+ case para_DescribedThing:
+ case para_Description:
case para_BiblioCited:
case para_Bullet:
case para_NumberedList:
whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
if (p->type == para_Bullet || p->type == para_NumberedList) {
- whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72);
+ whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting + 72);
whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36);
whlp_set_tabstop(h, 72, WHLP_ALIGN_LEFT);
whlp_begin_para(h, WHLP_PARA_SCROLL);
@@ -361,6 +373,8 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
}
whlp_tab(h);
} else {
+ whlp_para_attr(h, WHLP_PARA_LEFTINDENT,
+ 72*nesting + (p->type==para_Description ? 72 : 0));
whlp_begin_para(h, WHLP_PARA_SCROLL);
}
@@ -386,6 +400,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
for (w = p->words; w; w = w->next) {
if (!w->next)
whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+ whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting);
whlp_begin_para(h, WHLP_PARA_SCROLL);
whlp_set_font(h, FONT_CODE);
whlp_convert(w->text, &c, FALSE);
diff --git a/bk_xhtml.c b/bk_xhtml.c
index 140e2dd..053bf33 100644
--- a/bk_xhtml.c
+++ b/bk_xhtml.c
@@ -1076,14 +1076,15 @@ static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
/* Write this list of paragraphs. Close off all lists at the end. */
static void xhtml_do_paras(FILE *fp, paragraph *p)
{
- int last_type = -1, first=TRUE;
+ int last_type = -1, ptype, first=TRUE;
+ stack lcont_stack = stk_new();
if (!p)
return;
/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) {
first=FALSE;
- switch (p->type)
+ switch (ptype = p->type)
{
/*
* Things we ignore because we've already processed them or
@@ -1123,8 +1124,29 @@ static void xhtml_do_paras(FILE *fp, paragraph *p)
fprintf(fp, "
\n");
break;
+ case para_LcontPush:
+ {
+ int *p;
+ p = mknew(int);
+ *p = last_type;
+ stk_push(lcont_stack, p);
+ last_type = para_Normal;
+ }
+ break;
+ case para_LcontPop:
+ {
+ int *p = stk_pop(lcont_stack);
+ assert(p);
+ ptype = last_type = *p;
+ sfree(p);
+ goto closeofflist; /* ick */
+ }
+ break;
+
case para_Bullet:
case para_NumberedList:
+ case para_Description:
+ case para_DescribedThing:
case para_BiblioCited:
if (last_type!=p->type) {
/* start up list if necessary */
@@ -1132,41 +1154,62 @@ static void xhtml_do_paras(FILE *fp, paragraph *p)
fprintf(fp, "\n");
} else if (p->type == para_NumberedList) {
fprintf(fp, "\n");
- } else if (p->type == para_BiblioCited) {
+ } else if (p->type == para_BiblioCited ||
+ p->type == para_DescribedThing ||
+ p->type == para_Description) {
fprintf(fp, "\n");
}
}
- if (p->type == para_Bullet || p->type == para_NumberedList)
+ if (p->type == para_Bullet || p->type == para_NumberedList) {
fprintf(fp, "
- ");
- else if (p->type == para_BiblioCited) {
+ } else if (p->type == para_DescribedThing) {
+ fprintf(fp, "
- ");
+ } else if (p->type == para_Description) {
+ fprintf(fp, "
- ");
+ } else if (p->type == para_BiblioCited) {
fprintf(fp, "
- ");
xhtml_para(fp, p->kwtext);
fprintf(fp, "
\n- ");
}
xhtml_para(fp, p->words);
- if (p->type == para_BiblioCited) {
+ {
+ paragraph *p2 = p->next;
+ if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
+ break;
+ }
+
+ closeofflist:
+ if (ptype == para_BiblioCited) {
fprintf(fp, "
\n");
- } else if (p->type == para_Bullet || p->type == para_NumberedList) {
+ } else if (p->type == para_DescribedThing) {
+ fprintf(fp, "");
+ } else if (p->type == para_Description) {
+ fprintf(fp, "");
+ } else if (ptype == para_Bullet || ptype == para_NumberedList) {
fprintf(fp, " ");
}
- if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited)
+ if (ptype == para_Bullet || ptype == para_NumberedList ||
+ ptype == para_BiblioCited || ptype == para_Description ||
+ ptype == para_DescribedThing)
/* close off list if necessary */
{
paragraph *p2 = p->next;
int close_off=FALSE;
/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
if (p2 && xhtml_para_level(p2)==-1) {
- if (p2->type != p->type)
+ if (p2->type != ptype && p2->type != para_LcontPush)
close_off=TRUE;
} else {
close_off=TRUE;
}
if (close_off) {
- if (p->type == para_Bullet) {
+ if (ptype == para_Bullet) {
fprintf(fp, "
\n");
- } else if (p->type == para_NumberedList) {
+ } else if (ptype == para_NumberedList) {
fprintf(fp, "\n");
- } else if (p->type == para_BiblioCited) {
+ } else if (ptype == para_BiblioCited ||
+ ptype == para_Description ||
+ ptype == para_DescribedThing) {
fprintf(fp, "\n");
}
}
@@ -1177,8 +1220,10 @@ static void xhtml_do_paras(FILE *fp, paragraph *p)
xhtml_codepara(fp, p->words);
break;
}
- last_type = p->type;
+ last_type = ptype;
}
+
+ stk_free(lcont_stack);
}
/*
diff --git a/error.c b/error.c
index 6d8dd13..e9c681a 100644
--- a/error.c
+++ b/error.c
@@ -119,6 +119,11 @@ static void do_error(int code, va_list ap) {
sprintf(error, "unclosed braces at end of paragraph");
flags = FILEPOS;
break;
+ case err_missingrbrace2:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "unclosed braces at end of input file");
+ flags = FILEPOS;
+ break;
case err_nestedstyles:
fpos = *va_arg(ap, filepos *);
sprintf(error, "unable to nest text styles");
@@ -184,6 +189,16 @@ static void do_error(int code, va_list ap) {
sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
flags = FILEPOS;
break;
+ case err_misplacedlcont:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "\\lcont is only expected after a list item");
+ flags = FILEPOS;
+ break;
+ case err_sectmarkerinlcont:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "section headings are not supported within \\lcont");
+ flags = FILEPOS;
+ break;
case err_whatever:
sp = va_arg(ap, char *);
vsprintf(error, sp, ap);
diff --git a/halibut.h b/halibut.h
index c948624..6b3d383 100644
--- a/halibut.h
+++ b/halibut.h
@@ -101,6 +101,8 @@ enum {
para_BiblioCited, /* ... into this paragraph type */
para_Bullet,
para_NumberedList,
+ para_DescribedThing,
+ para_Description,
para_Code,
para_Copyright,
para_Preamble,
@@ -108,6 +110,8 @@ enum {
para_Title,
para_VersionID,
para_Config, /* configuration directive */
+ para_LcontPush, /* begin continuation of list item */
+ para_LcontPop, /* end continuation of list item */
para_NotParaType /* placeholder value */
};
@@ -197,6 +201,7 @@ enum {
err_commenteof, /* EOF inside braced comment */
err_kwexprbr, /* expected `}' after cross-ref */
err_missingrbrace, /* unclosed braces at end of para */
+ err_missingrbrace2, /* unclosed braces at end of file */
err_nestedstyles, /* unable to nest text styles */
err_nestedindex, /* unable to nest `\i' thingys */
err_nosuchkw, /* unresolved cross-reference */
@@ -207,6 +212,8 @@ enum {
err_sectjump, /* jump a heading level, eg \C -> \S */
err_winhelp_ctxclash, /* WinHelp context ID hash clash */
err_multikw, /* keyword clash in sections */
+ err_misplacedlcont, /* \lcont not after a list item */
+ err_sectmarkerinlcont, /* section marker appeared in \lcont */
err_whatever /* random error of another type */
};
@@ -277,6 +284,7 @@ stack stk_new(void);
void stk_free(stack);
void stk_push(stack, void *);
void *stk_pop(stack);
+void *stk_top(stack);
typedef struct tagRdstring rdstring;
struct tagRdstring {
@@ -413,4 +421,9 @@ void xhtml_backend(paragraph *, keywordlist *, indexdata *);
*/
void whlp_backend(paragraph *, keywordlist *, indexdata *);
+/*
+ * bk_man.c
+ */
+void man_backend(paragraph *, keywordlist *, indexdata *);
+
#endif
diff --git a/input.c b/input.c
index d4ecaeb..6cad0f7 100644
--- a/input.c
+++ b/input.c
@@ -180,11 +180,14 @@ enum {
c_copyright, /* copyright statement */
c_cw, /* weak code */
c_date, /* document processing date */
+ c_dd, /* description list: description */
c_define, /* macro definition */
+ c_dt, /* description list: described thing */
c_e, /* emphasis */
c_i, /* visible index mark */
c_ii, /* uncapitalised visible index mark */
c_k, /* uncapitalised cross-reference */
+ c_lcont, /* continuation para(s) for list item */
c_n, /* numbered list */
c_nocite, /* bibliography trickery */
c_preamble, /* document preamble text */
@@ -245,11 +248,14 @@ static void match_kw(token *tok) {
{"copyright", c_copyright}, /* copyright statement */
{"cw", c_cw}, /* weak code */
{"date", c_date}, /* document processing date */
+ {"dd", c_dd}, /* description list: description */
{"define", c_define}, /* macro definition */
+ {"dt", c_dt}, /* description list: described thing */
{"e", c_e}, /* emphasis */
{"i", c_i}, /* visible index mark */
{"ii", c_ii}, /* uncapitalised visible index mark */
{"k", c_k}, /* uncapitalised cross-reference */
+ {"lcont", c_lcont}, /* continuation para(s) for list item */
{"n", c_n}, /* numbered list */
{"nocite", c_nocite}, /* bibliography trickery */
{"preamble", c_preamble}, /* document preamble text */
@@ -506,6 +512,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
int already;
int iswhite, seenwhite;
int type;
+ int prev_para_type;
struct stack_item {
enum {
stack_nop = 0, /* do nothing (for error recovery) */
@@ -519,6 +526,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
word **idximplicit; /* to restore from \u alternatives */
} *sitem;
stack parsestk;
+ struct crossparaitem {
+ int type; /* currently c_lcont or -1 */
+ int seen_lcont;
+ };
+ stack crossparastk;
word *indexword, *uword, *iword;
word *idxwordlist;
rdstring indexstr;
@@ -530,6 +542,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
macros = newtree234(macrocmp);
already = FALSE;
+ crossparastk = stk_new();
+
/*
* Loop on each paragraph.
*/
@@ -542,10 +556,12 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
/*
* Get a token.
*/
- if (!already) {
- dtor(t), t = get_token(in);
- }
- already = FALSE;
+ do {
+ if (!already) {
+ dtor(t), t = get_token(in);
+ }
+ already = FALSE;
+ } while (t.type == tok_eop);
if (t.type == tok_eof)
break;
@@ -574,18 +590,83 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
break;
else if (t.type != tok_cmd || t.cmd != c_c) {
error(err_brokencodepara, &t.pos);
+ prev_para_type = par.type;
addpara(par, ret);
while (t.type != tok_eop) /* error recovery: */
dtor(t), t = get_token(in); /* eat rest of paragraph */
goto codeparabroken; /* ick, but such is life */
}
}
+ prev_para_type = par.type;
addpara(par, ret);
codeparabroken:
continue;
}
/*
+ * Spot the special commands that define a grouping of more
+ * than one paragraph, and also the closing braces that
+ * finish them.
+ */
+ if (t.type == tok_cmd &&
+ t.cmd == c_lcont) {
+ struct crossparaitem *sitem, *stop;
+
+ /*
+ * Expect, and swallow, an open brace.
+ */
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ continue;
+ }
+
+ /*
+ * \lcont causes a continuation of a list item into
+ * multiple paragraphs (which may in turn contain
+ * nested lists, code paras etc). Hence, the previous
+ * paragraph must be of a list type.
+ */
+ sitem = mknew(struct crossparaitem);
+ stop = (struct crossparaitem *)stk_top(crossparastk);
+ if (prev_para_type == para_Bullet ||
+ prev_para_type == para_NumberedList ||
+ prev_para_type == para_Description) {
+ sitem->type = c_lcont;
+ sitem->seen_lcont = 1;
+ par.type = para_LcontPush;
+ prev_para_type = par.type;
+ addpara(par, ret);
+ } else {
+ /*
+ * Push a null item on the cross-para stack so that
+ * when we see the corresponding closing brace we
+ * don't give a cascade error.
+ */
+ sitem->type = -1;
+ sitem->seen_lcont = (stop ? stop->seen_lcont : 0);
+ error(err_misplacedlcont, &t.pos);
+ }
+ stk_push(crossparastk, sitem);
+ continue;
+ } else if (t.type == tok_rbrace) {
+ struct crossparaitem *sitem = stk_pop(crossparastk);
+ if (!sitem)
+ error(err_unexbrace, &t.pos);
+ else {
+ switch (sitem->type) {
+ case c_lcont:
+ par.type = para_LcontPop;
+ prev_para_type = par.type;
+ addpara(par, ret);
+ break;
+ }
+ sfree(sitem);
+ }
+ continue;
+ }
+
+ /*
* This token begins a paragraph. See if it's one of the
* special commands that define a paragraph type.
*
@@ -638,6 +719,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
/* For \b and \n the keyword is optional */
case c_b: needkw = 4; par.type = para_Bullet; break;
+ case c_dt: needkw = 4; par.type = para_DescribedThing; break;
+ case c_dd: needkw = 4; par.type = para_Description; break;
case c_n: needkw = 4; par.type = para_NumberedList; break;
case c_cfg: needkw = 8; par.type = para_Config;
start_cmd = c_cfg; break;
@@ -651,6 +734,17 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
case c_versionid: needkw = 32; par.type = para_VersionID; break;
}
+ if (par.type == para_Chapter ||
+ par.type == para_Heading ||
+ par.type == para_Subsect ||
+ par.type == para_Appendix ||
+ par.type == para_UnnumberedChapter) {
+ struct crossparaitem *sitem = stk_top(crossparastk);
+ if (sitem && sitem->seen_lcont) {
+ error(err_sectmarkerinlcont, &t.pos);
+ }
+ }
+
if (needkw > 0) {
rdstring rs = { 0, 0, NULL };
int nkeys = 0;
@@ -732,6 +826,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
}
if (t.type == tok_cmd)
already = TRUE;/* inhibit get_token at top of loop */
+ prev_para_type = par.type;
addpara(par, ret);
continue; /* next paragraph */
}
@@ -837,9 +932,16 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
break;
case tok_rbrace:
sitem = stk_pop(parsestk);
- if (!sitem)
- error(err_unexbrace, &t.pos);
- else {
+ if (!sitem) {
+ /*
+ * This closing brace could have been an
+ * indication that the cross-paragraph stack
+ * wants popping. Accordingly, we treat it here
+ * as an indication that the paragraph is over.
+ */
+ already = TRUE;
+ goto finished_para;
+ } else {
if (sitem->type & stack_ualt) {
whptr = sitem->whptr;
idximplicit = sitem->idximplicit;
@@ -1170,16 +1272,26 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
dtor(t), t = get_token(in);
seenwhite = iswhite;
}
+ finished_para:
/* Check the stack is empty */
- if (NULL != (sitem = stk_pop(parsestk))) {
- do {
+ if (stk_top(parsestk)) {
+ while ((sitem = stk_pop(parsestk)))
sfree(sitem);
- sitem = stk_pop(parsestk);
- } while (sitem);
error(err_missingrbrace, &t.pos);
}
stk_free(parsestk);
+ prev_para_type = par.type;
addpara(par, ret);
+ if (t.type == tok_eof)
+ already = TRUE;
+ }
+
+ if (stk_top(crossparastk)) {
+ void *p;
+
+ error(err_missingrbrace2, &t.pos);
+ while ((p = stk_pop(crossparastk)))
+ sfree(p);
}
/*
@@ -1188,6 +1300,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
*/
dtor(t);
macrocleanup(macros);
+
+ stk_free(crossparastk);
}
paragraph *read_input(input *in, indexdata *idx) {
diff --git a/inputs/test.but b/inputs/test.but
index 8ac6e86..059567e 100644
--- a/inputs/test.but
+++ b/inputs/test.but
@@ -19,7 +19,7 @@ a bit]
\define{coopt} co\u00F6{-o}pt
-\versionid $Id: test.but,v 1.19 2002/08/12 11:24:28 simon Exp $
+\versionid $Id: test.but,v 1.20 2004/03/23 20:10:23 simon Exp $
\C{ch\\ap} First chapter title; for similar wrapping reasons this
chapter title will be ludicrously long. I wonder how much more
@@ -33,6 +33,14 @@ fragments}.
\cw{This} is weak code. And \k{head} contains some other stuff.
\K{subhead} does too.
+To test the man page back end:
+
+.Directive
+
+'Directive
+
+\\Sping\\Spong\\Spoing
+
\H{head} First section title (very long again, no prizes for
guessing the reason why this time, and here's yet more text to pad
it out to three lines of output)
@@ -56,6 +64,39 @@ This is a list:
\b Aah.
+\lcont{
+
+This bulletted list contains a list continuation. This is an
+additional paragraph, or more than one, indented at the same level
+as the list items, and able to contain nested sublists and other
+features. For example, here's a code paragraph:
+
+\c spingle:~$ whoami
+\c spoggler
+
+And here's a sublist. Numbered, just for variety.
+
+\n One.
+
+\lcont{
+
+\n 1a.
+
+\n 1b.
+
+\n 1c.
+
+\lcont{This is an even sillier one: a continuation of a list item in
+a continuation of a list item in a continuation of a list item!}
+
+}
+
+\n Two.
+
+\n Threeeee!
+
+}
+
\b Eek.
This is a horizontal rule:
@@ -70,6 +111,31 @@ This is a numbered list:
\n Eek. \q{Aah} is point \k{keyword}.
+This is a description list:
+
+\dt FISH
+
+\dd A piscine creature, often to be found swimming aimlessly around
+in the sea eating things and not contributing to the global economy.
+
+\lcont{
+
+Here's another of those funky list continuation things, just to keep
+Halibut on its toes.
+
+}
+
+\dt BADGER
+
+\dd A non-piscine creature, often to be found snuffling around on
+land, not contributing to the global economy, and not even swimming
+to make up for it. I don't know. These mammals. Pa-thetic.
+
+\dt "SAUSAGE SALESMAN"
+
+\dd An exemplary contributor to the global economy. Unless he's CMOT
+Dibbler.
+
A-paragraph-full-of-hyphens-to-test-the-idea-that-word-wrapping-can-happen-somewhere-in-all-this-hyphenatory-nonsense.
A\-paragraph\-full\-of\-nonbreaking\-hyphens\-to\-test\-the\-idea\-that\-word\-wrapping\-misses\-them.
diff --git a/main.c b/main.c
index 64f1869..531d122 100644
--- a/main.c
+++ b/main.c
@@ -217,6 +217,7 @@ int main(int argc, char **argv) {
text_backend(sourceform, keywords, idx);
xhtml_backend(sourceform, keywords, idx);
whlp_backend(sourceform, keywords, idx);
+ man_backend(sourceform, keywords, idx);
free_para_list(sourceform);
free_keywords(keywords);
diff --git a/misc.c b/misc.c
index c4ac72f..0de6c2a 100644
--- a/misc.c
+++ b/misc.c
@@ -41,6 +41,13 @@ void *stk_pop(stack s) {
return NULL;
}
+void *stk_top(stack s) {
+ if (s->sp > 0)
+ return s->data[s->sp-1];
+ else
+ return NULL;
+}
+
/*
* Small routines to amalgamate a string from an input source.
*/
--
2.11.0