From 7136a6c7f094fa423c48ec319748c4fd7e1fa645 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 23 Mar 2004 20:10:23 +0000 Subject: [PATCH] Man-page back end for Halibut. Also, a couple of additional markup features commonly used in man pages: (a) the ability to nest paragraph breaks, code paragraphs and other lists inside list items, and (b) description lists as normally used in man pages to describe command-line options. git-svn-id: svn://svn.tartarus.org/sgt/halibut@3954 cda61777-01e9-0310-a592-d414129be87e --- Makefile | 2 +- bk_man.c | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bk_text.c | 30 +++++- bk_whlp.c | 17 ++- bk_xhtml.c | 71 +++++++++--- error.c | 15 +++ halibut.h | 13 +++ input.c | 136 +++++++++++++++++++++-- inputs/test.but | 68 +++++++++++- main.c | 1 + misc.c | 7 ++ 11 files changed, 659 insertions(+), 31 deletions(-) create mode 100644 bk_man.c diff --git a/Makefile b/Makefile index 35d2eef..bfd14a5 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ SRC := ../ MODULES := main malloc ustring error help licence version misc tree234 MODULES += input keywords contents index style biblio -MODULES += bk_text bk_xhtml bk_whlp +MODULES += bk_text bk_xhtml bk_whlp bk_man MODULES += winhelp OBJECTS := $(addsuffix .o,$(MODULES)) diff --git a/bk_man.c b/bk_man.c new file mode 100644 index 0000000..02d942a --- /dev/null +++ b/bk_man.c @@ -0,0 +1,330 @@ +/* + * man page backend for Halibut + */ + +#include +#include +#include +#include "halibut.h" + +static void man_text(FILE *, word *, int newline, int quote_props); +static void man_codepara(FILE *, word *); + +#define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */ +#define QUOTE_QUOTES 2 /* quote double quotes by doubling them */ + +void man_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx) { + paragraph *p; + FILE *fp; + char const *sep; + + IGNORE(keywords); /* we don't happen to need this */ + IGNORE(idx); /* or this */ + + /* + * Determine the output file name, and open the output file + * + * FIXME: want configurable output file names here. For the + * moment, we'll just call it `output.1'. + */ + fp = fopen("output.1", "w"); + if (!fp) { + error(err_cantopenw, "output.1"); + return; + } + + /* Do the version ID */ + for (p = sourceform; p; p = p->next) + if (p->type == para_VersionID) { + fprintf(fp, ".\\\" "); + man_text(fp, p->words, TRUE, 0); + } + + /* FIXME: .TH name-of-program manual-section */ + fprintf(fp, ".TH FIXME 1\n"); + + fprintf(fp, ".UC\n"); + + /* Do the preamble and copyright */ + sep = ""; + for (p = sourceform; p; p = p->next) + if (p->type == para_Preamble) { + fprintf(fp, "%s", sep); + man_text(fp, p->words, TRUE, 0); + sep = "\n"; + } + for (p = sourceform; p; p = p->next) + if (p->type == para_Copyright) { + fprintf(fp, "%s", sep); + man_text(fp, p->words, TRUE, 0); + sep = "\n"; + } + + /* + * FIXME: + * + * - figure out precisely what needs to be escaped. + * * A dot or apostrophe at the start of a line wants to be + * preceded by `\&', which is a zero-width space. + * * Literal backslashes always want doubling. + * * Within double quotes, a double quote needs doubling + * too. + * + * - work out what to do about hyphens / minuses... + */ + for (p = sourceform; p; p = p->next) switch (p->type) { + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Headings. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + case para_Heading: + case para_Subsect: + fprintf(fp, ".SH \""); + /* FIXME: disable this, at _least_ by default */ + if (p->kwtext) + man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES); + fprintf(fp, " "); + man_text(fp, p->words, FALSE, QUOTE_QUOTES); + fprintf(fp, "\"\n"); + break; + + /* + * Code paragraphs. + */ + case para_Code: + fprintf(fp, ".PP\n"); + man_codepara(fp, p->words); + break; + + /* + * Normal paragraphs. + */ + case para_Normal: + fprintf(fp, ".PP\n"); + man_text(fp, p->words, TRUE, 0); + break; + + /* + * List paragraphs. + */ + case para_Description: + case para_BiblioCited: + case para_Bullet: + case para_NumberedList: + if (p->type == para_Bullet) { + fprintf(fp, ".IP \"\\fBo\\fP\"\n"); /* FIXME: configurable? */ + } else if (p->type == para_NumberedList) { + fprintf(fp, ".IP \""); + man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES); + fprintf(fp, "\"\n"); + } else if (p->type == para_Description) { + /* + * Do nothing; the .xP for this paragraph is the .IP + * which has come before it in the DescribedThing. + */ + } else if (p->type == para_BiblioCited) { + fprintf(fp, ".IP \""); + man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES); + fprintf(fp, "\"\n"); + } + man_text(fp, p->words, TRUE, 0); + break; + + case para_DescribedThing: + fprintf(fp, ".IP \""); + man_text(fp, p->words, FALSE, QUOTE_QUOTES); + fprintf(fp, "\"\n"); + break; + + case para_Rule: + /* + * FIXME. + */ + break; + + case para_LcontPush: + fprintf(fp, ".RS\n"); + break; + case para_LcontPop: + fprintf(fp, ".RE\n"); + break; + } + + /* + * Tidy up. + */ + fclose(fp); +} + +/* + * Convert a wide string into a string of chars. If `result' is + * non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible for the output character + * set. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + * + * FIXME: Here is probably also a good place to do escaping sorts + * of things. I know I at least need to escape backslash, and full + * stops at the starts of words are probably trouble as well. + */ +static int man_convert(wchar_t *s, char **result, int quote_props) { + /* + * FIXME. Currently this is ISO8859-1 only. + */ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) { + wchar_t c = *s; + char outc; + + if ((c >= 32 && c <= 126) || + (c >= 160 && c <= 255)) { + /* Char is OK. */ + outc = (char)c; + } else { + /* Char is not OK. */ + ok = FALSE; + outc = 0xBF; /* approximate the good old DEC `uh?' */ + } + if (doing) { + if (plen+3 >= psize) { + psize = plen + 256; + p = resize(p, psize); + } + if (plen == 0 && (outc == '.' || outc == '\'') && + (quote_props & QUOTE_INITCTRL)) { + /* + * Control character (. or ') at the start of a + * line. Quote it by putting \& (troff zero-width + * space) before it. + */ + p[plen++] = '\\'; + p[plen++] = '&'; + } else if (outc == '\\') { + /* + * Quote backslashes by doubling them, always. + */ + p[plen++] = '\\'; + } else if (outc == '"' && (quote_props & QUOTE_QUOTES)) { + /* + * Double quote within double quotes. Quote it by + * doubling. + */ + p[plen++] = '"'; + } + p[plen++] = outc; + } + } + if (doing) { + p = resize(p, plen+1); + p[plen] = '\0'; + *result = p; + } + return ok; +} + +static void man_rdaddwc(rdstringc *rs, word *text, word *end, + int quote_props) { + char *c; + + for (; text && text != end; text = text->next) switch (text->type) { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, "\\fI"); + else if (towordstyle(text->type) == word_Code && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, "\\fB"); + if (removeattr(text->type) == word_Normal) { + if (rs->pos > 0) + quote_props &= ~QUOTE_INITCTRL; /* not at start any more */ + if (man_convert(text->text, &c, quote_props)) + rdaddsc(rs, c); + else + man_rdaddwc(rs, text->alt, NULL, quote_props); + sfree(c); + } else if (removeattr(text->type) == word_WhiteSpace) { + rdaddc(rs, ' '); + } else if (removeattr(text->type) == word_Quote) { + rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\''); + /* FIXME: configurability */ + } + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, "\\fP"); + else if (towordstyle(text->type) == word_Code && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, "\\fP"); + break; + } +} + +static void man_text(FILE *fp, word *text, int newline, int quote_props) { + rdstringc t = { 0, 0, NULL }; + + man_rdaddwc(&t, text, NULL, quote_props | QUOTE_INITCTRL); + fprintf(fp, "%s", t.text); + sfree(t.text); + if (newline) + fputc('\n', fp); +} + +static void man_codepara(FILE *fp, word *text) { + fprintf(fp, ".nf\n"); + for (; text; text = text->next) if (text->type == word_WeakCode) { + char *c; + man_convert(text->text, &c, QUOTE_INITCTRL); + fprintf(fp, "%s\n", c); + sfree(c); + } + fprintf(fp, ".fi\n"); +} diff --git a/bk_text.c b/bk_text.c index 48e99f4..989cdbf 100644 --- a/bk_text.c +++ b/bk_text.c @@ -182,6 +182,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, word spaceword; FILE *fp; char *prefixextra; + int nesting, nestindent; int indentb, indenta; IGNORE(keywords); /* we don't happen to need this */ @@ -219,9 +220,20 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, conf.indent_preambles ? conf.indent : 0, 0, conf.width + (conf.indent_preambles ? 0 : conf.indent)); + nestindent = conf.listindentbefore + conf.listindentafter; + nesting = 0; + /* Do the main document */ for (p = sourceform; p; p = p->next) switch (p->type) { + case para_LcontPush: + nesting++; + break; + case para_LcontPop: + assert(nesting > 0); + nesting--; + break; + /* * Things we ignore because we've already processed them or * aren't going to touch them in this pass. @@ -254,10 +266,13 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, break; case para_Rule: - text_rule(fp, conf.indent, conf.width); + text_rule(fp, conf.indent + nestindent*nesting, + conf.width - nestindent*nesting); break; case para_Normal: + case para_DescribedThing: + case para_Description: case para_BiblioCited: case para_Bullet: case para_NumberedList: @@ -271,6 +286,11 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, prefixextra = "."; /* FIXME: configurability */ indentb = conf.listindentbefore; indenta = conf.listindentafter; + } else if (p->type == para_Description) { + prefix = NULL; + prefixextra = NULL; + indentb = conf.listindentbefore; + indenta = conf.listindentafter; } else { prefix = NULL; prefixextra = NULL; @@ -289,8 +309,8 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, body = p->words; } text_para(fp, prefix, prefixextra, body, - conf.indent + indentb, indenta, - conf.width - indentb - indenta); + conf.indent + nestindent*nesting + indentb, indenta, + conf.width - nestindent*nesting - indentb - indenta); if (wp) { wp->next = NULL; free_word_list(body); @@ -298,7 +318,9 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, break; case para_Code: - text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code); + text_codepara(fp, p->words, + conf.indent + nestindent*nesting + conf.indent_code, + conf.width - nestindent*nesting - 2 * conf.indent_code); break; } diff --git a/bk_whlp.c b/bk_whlp.c index f4b1b2d..a21ac04 100644 --- a/bk_whlp.c +++ b/bk_whlp.c @@ -50,6 +50,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, struct bk_whlp_state state; WHLP_TOPIC contents_topic; int i; + int nesting; indexentry *ie; filename = "output.hlp"; /* FIXME: configurability */ @@ -216,6 +217,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, * Now we've done the contents page, we're ready to go through * and do the main manual text. Ooh. */ + nesting = 0; for (p = sourceform; p; p = p->next) switch (p->type) { /* * Things we ignore because we've already processed them or @@ -231,6 +233,14 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, case para_Title: break; + case para_LcontPush: + nesting++; + break; + case para_LcontPop: + assert(nesting > 0); + nesting--; + break; + /* * Chapter and section titles: start a new Help topic. */ @@ -343,12 +353,14 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, break; case para_Normal: + case para_DescribedThing: + case para_Description: case para_BiblioCited: case para_Bullet: case para_NumberedList: whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); if (p->type == para_Bullet || p->type == para_NumberedList) { - whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72); + whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting + 72); whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36); whlp_set_tabstop(h, 72, WHLP_ALIGN_LEFT); whlp_begin_para(h, WHLP_PARA_SCROLL); @@ -361,6 +373,8 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, } whlp_tab(h); } else { + whlp_para_attr(h, WHLP_PARA_LEFTINDENT, + 72*nesting + (p->type==para_Description ? 72 : 0)); whlp_begin_para(h, WHLP_PARA_SCROLL); } @@ -386,6 +400,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, for (w = p->words; w; w = w->next) { if (!w->next) whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); + whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting); whlp_begin_para(h, WHLP_PARA_SCROLL); whlp_set_font(h, FONT_CODE); whlp_convert(w->text, &c, FALSE); diff --git a/bk_xhtml.c b/bk_xhtml.c index 140e2dd..053bf33 100644 --- a/bk_xhtml.c +++ b/bk_xhtml.c @@ -1076,14 +1076,15 @@ static void xhtml_do_sections(FILE *fp, xhtmlsection *sections) /* Write this list of paragraphs. Close off all lists at the end. */ static void xhtml_do_paras(FILE *fp, paragraph *p) { - int last_type = -1, first=TRUE; + int last_type = -1, ptype, first=TRUE; + stack lcont_stack = stk_new(); if (!p) return; /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/ for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) { first=FALSE; - switch (p->type) + switch (ptype = p->type) { /* * Things we ignore because we've already processed them or @@ -1123,8 +1124,29 @@ static void xhtml_do_paras(FILE *fp, paragraph *p) fprintf(fp, "

\n"); break; + case para_LcontPush: + { + int *p; + p = mknew(int); + *p = last_type; + stk_push(lcont_stack, p); + last_type = para_Normal; + } + break; + case para_LcontPop: + { + int *p = stk_pop(lcont_stack); + assert(p); + ptype = last_type = *p; + sfree(p); + goto closeofflist; /* ick */ + } + break; + case para_Bullet: case para_NumberedList: + case para_Description: + case para_DescribedThing: case para_BiblioCited: if (last_type!=p->type) { /* start up list if necessary */ @@ -1132,41 +1154,62 @@ static void xhtml_do_paras(FILE *fp, paragraph *p) fprintf(fp, "
    \n"); } else if (p->type == para_NumberedList) { fprintf(fp, "
      \n"); - } else if (p->type == para_BiblioCited) { + } else if (p->type == para_BiblioCited || + p->type == para_DescribedThing || + p->type == para_Description) { fprintf(fp, "
      \n"); } } - if (p->type == para_Bullet || p->type == para_NumberedList) + if (p->type == para_Bullet || p->type == para_NumberedList) { fprintf(fp, "
    1. "); - else if (p->type == para_BiblioCited) { + } else if (p->type == para_DescribedThing) { + fprintf(fp, "
      "); + } else if (p->type == para_Description) { + fprintf(fp, "
      "); + } else if (p->type == para_BiblioCited) { fprintf(fp, "
      "); xhtml_para(fp, p->kwtext); fprintf(fp, "
      \n
      "); } xhtml_para(fp, p->words); - if (p->type == para_BiblioCited) { + { + paragraph *p2 = p->next; + if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush) + break; + } + + closeofflist: + if (ptype == para_BiblioCited) { fprintf(fp, "
      \n"); - } else if (p->type == para_Bullet || p->type == para_NumberedList) { + } else if (p->type == para_DescribedThing) { + fprintf(fp, ""); + } else if (p->type == para_Description) { + fprintf(fp, ""); + } else if (ptype == para_Bullet || ptype == para_NumberedList) { fprintf(fp, "
    2. "); } - if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited) + if (ptype == para_Bullet || ptype == para_NumberedList || + ptype == para_BiblioCited || ptype == para_Description || + ptype == para_DescribedThing) /* close off list if necessary */ { paragraph *p2 = p->next; int close_off=FALSE; /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/ if (p2 && xhtml_para_level(p2)==-1) { - if (p2->type != p->type) + if (p2->type != ptype && p2->type != para_LcontPush) close_off=TRUE; } else { close_off=TRUE; } if (close_off) { - if (p->type == para_Bullet) { + if (ptype == para_Bullet) { fprintf(fp, "
\n"); - } else if (p->type == para_NumberedList) { + } else if (ptype == para_NumberedList) { fprintf(fp, "\n"); - } else if (p->type == para_BiblioCited) { + } else if (ptype == para_BiblioCited || + ptype == para_Description || + ptype == para_DescribedThing) { fprintf(fp, "\n"); } } @@ -1177,8 +1220,10 @@ static void xhtml_do_paras(FILE *fp, paragraph *p) xhtml_codepara(fp, p->words); break; } - last_type = p->type; + last_type = ptype; } + + stk_free(lcont_stack); } /* diff --git a/error.c b/error.c index 6d8dd13..e9c681a 100644 --- a/error.c +++ b/error.c @@ -119,6 +119,11 @@ static void do_error(int code, va_list ap) { sprintf(error, "unclosed braces at end of paragraph"); flags = FILEPOS; break; + case err_missingrbrace2: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unclosed braces at end of input file"); + flags = FILEPOS; + break; case err_nestedstyles: fpos = *va_arg(ap, filepos *); sprintf(error, "unable to nest text styles"); @@ -184,6 +189,16 @@ static void do_error(int code, va_list ap) { sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); flags = FILEPOS; break; + case err_misplacedlcont: + fpos = *va_arg(ap, filepos *); + sprintf(error, "\\lcont is only expected after a list item"); + flags = FILEPOS; + break; + case err_sectmarkerinlcont: + fpos = *va_arg(ap, filepos *); + sprintf(error, "section headings are not supported within \\lcont"); + flags = FILEPOS; + break; case err_whatever: sp = va_arg(ap, char *); vsprintf(error, sp, ap); diff --git a/halibut.h b/halibut.h index c948624..6b3d383 100644 --- a/halibut.h +++ b/halibut.h @@ -101,6 +101,8 @@ enum { para_BiblioCited, /* ... into this paragraph type */ para_Bullet, para_NumberedList, + para_DescribedThing, + para_Description, para_Code, para_Copyright, para_Preamble, @@ -108,6 +110,8 @@ enum { para_Title, para_VersionID, para_Config, /* configuration directive */ + para_LcontPush, /* begin continuation of list item */ + para_LcontPop, /* end continuation of list item */ para_NotParaType /* placeholder value */ }; @@ -197,6 +201,7 @@ enum { err_commenteof, /* EOF inside braced comment */ err_kwexprbr, /* expected `}' after cross-ref */ err_missingrbrace, /* unclosed braces at end of para */ + err_missingrbrace2, /* unclosed braces at end of file */ err_nestedstyles, /* unable to nest text styles */ err_nestedindex, /* unable to nest `\i' thingys */ err_nosuchkw, /* unresolved cross-reference */ @@ -207,6 +212,8 @@ enum { err_sectjump, /* jump a heading level, eg \C -> \S */ err_winhelp_ctxclash, /* WinHelp context ID hash clash */ err_multikw, /* keyword clash in sections */ + err_misplacedlcont, /* \lcont not after a list item */ + err_sectmarkerinlcont, /* section marker appeared in \lcont */ err_whatever /* random error of another type */ }; @@ -277,6 +284,7 @@ stack stk_new(void); void stk_free(stack); void stk_push(stack, void *); void *stk_pop(stack); +void *stk_top(stack); typedef struct tagRdstring rdstring; struct tagRdstring { @@ -413,4 +421,9 @@ void xhtml_backend(paragraph *, keywordlist *, indexdata *); */ void whlp_backend(paragraph *, keywordlist *, indexdata *); +/* + * bk_man.c + */ +void man_backend(paragraph *, keywordlist *, indexdata *); + #endif diff --git a/input.c b/input.c index d4ecaeb..6cad0f7 100644 --- a/input.c +++ b/input.c @@ -180,11 +180,14 @@ enum { c_copyright, /* copyright statement */ c_cw, /* weak code */ c_date, /* document processing date */ + c_dd, /* description list: description */ c_define, /* macro definition */ + c_dt, /* description list: described thing */ c_e, /* emphasis */ c_i, /* visible index mark */ c_ii, /* uncapitalised visible index mark */ c_k, /* uncapitalised cross-reference */ + c_lcont, /* continuation para(s) for list item */ c_n, /* numbered list */ c_nocite, /* bibliography trickery */ c_preamble, /* document preamble text */ @@ -245,11 +248,14 @@ static void match_kw(token *tok) { {"copyright", c_copyright}, /* copyright statement */ {"cw", c_cw}, /* weak code */ {"date", c_date}, /* document processing date */ + {"dd", c_dd}, /* description list: description */ {"define", c_define}, /* macro definition */ + {"dt", c_dt}, /* description list: described thing */ {"e", c_e}, /* emphasis */ {"i", c_i}, /* visible index mark */ {"ii", c_ii}, /* uncapitalised visible index mark */ {"k", c_k}, /* uncapitalised cross-reference */ + {"lcont", c_lcont}, /* continuation para(s) for list item */ {"n", c_n}, /* numbered list */ {"nocite", c_nocite}, /* bibliography trickery */ {"preamble", c_preamble}, /* document preamble text */ @@ -506,6 +512,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { int already; int iswhite, seenwhite; int type; + int prev_para_type; struct stack_item { enum { stack_nop = 0, /* do nothing (for error recovery) */ @@ -519,6 +526,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { word **idximplicit; /* to restore from \u alternatives */ } *sitem; stack parsestk; + struct crossparaitem { + int type; /* currently c_lcont or -1 */ + int seen_lcont; + }; + stack crossparastk; word *indexword, *uword, *iword; word *idxwordlist; rdstring indexstr; @@ -530,6 +542,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { macros = newtree234(macrocmp); already = FALSE; + crossparastk = stk_new(); + /* * Loop on each paragraph. */ @@ -542,10 +556,12 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* * Get a token. */ - if (!already) { - dtor(t), t = get_token(in); - } - already = FALSE; + do { + if (!already) { + dtor(t), t = get_token(in); + } + already = FALSE; + } while (t.type == tok_eop); if (t.type == tok_eof) break; @@ -574,18 +590,83 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { break; else if (t.type != tok_cmd || t.cmd != c_c) { error(err_brokencodepara, &t.pos); + prev_para_type = par.type; addpara(par, ret); while (t.type != tok_eop) /* error recovery: */ dtor(t), t = get_token(in); /* eat rest of paragraph */ goto codeparabroken; /* ick, but such is life */ } } + prev_para_type = par.type; addpara(par, ret); codeparabroken: continue; } /* + * Spot the special commands that define a grouping of more + * than one paragraph, and also the closing braces that + * finish them. + */ + if (t.type == tok_cmd && + t.cmd == c_lcont) { + struct crossparaitem *sitem, *stop; + + /* + * Expect, and swallow, an open brace. + */ + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + continue; + } + + /* + * \lcont causes a continuation of a list item into + * multiple paragraphs (which may in turn contain + * nested lists, code paras etc). Hence, the previous + * paragraph must be of a list type. + */ + sitem = mknew(struct crossparaitem); + stop = (struct crossparaitem *)stk_top(crossparastk); + if (prev_para_type == para_Bullet || + prev_para_type == para_NumberedList || + prev_para_type == para_Description) { + sitem->type = c_lcont; + sitem->seen_lcont = 1; + par.type = para_LcontPush; + prev_para_type = par.type; + addpara(par, ret); + } else { + /* + * Push a null item on the cross-para stack so that + * when we see the corresponding closing brace we + * don't give a cascade error. + */ + sitem->type = -1; + sitem->seen_lcont = (stop ? stop->seen_lcont : 0); + error(err_misplacedlcont, &t.pos); + } + stk_push(crossparastk, sitem); + continue; + } else if (t.type == tok_rbrace) { + struct crossparaitem *sitem = stk_pop(crossparastk); + if (!sitem) + error(err_unexbrace, &t.pos); + else { + switch (sitem->type) { + case c_lcont: + par.type = para_LcontPop; + prev_para_type = par.type; + addpara(par, ret); + break; + } + sfree(sitem); + } + continue; + } + + /* * This token begins a paragraph. See if it's one of the * special commands that define a paragraph type. * @@ -638,6 +719,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { case c_U: needkw = 32; par.type = para_UnnumberedChapter; break; /* For \b and \n the keyword is optional */ case c_b: needkw = 4; par.type = para_Bullet; break; + case c_dt: needkw = 4; par.type = para_DescribedThing; break; + case c_dd: needkw = 4; par.type = para_Description; break; case c_n: needkw = 4; par.type = para_NumberedList; break; case c_cfg: needkw = 8; par.type = para_Config; start_cmd = c_cfg; break; @@ -651,6 +734,17 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { case c_versionid: needkw = 32; par.type = para_VersionID; break; } + if (par.type == para_Chapter || + par.type == para_Heading || + par.type == para_Subsect || + par.type == para_Appendix || + par.type == para_UnnumberedChapter) { + struct crossparaitem *sitem = stk_top(crossparastk); + if (sitem && sitem->seen_lcont) { + error(err_sectmarkerinlcont, &t.pos); + } + } + if (needkw > 0) { rdstring rs = { 0, 0, NULL }; int nkeys = 0; @@ -732,6 +826,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } if (t.type == tok_cmd) already = TRUE;/* inhibit get_token at top of loop */ + prev_para_type = par.type; addpara(par, ret); continue; /* next paragraph */ } @@ -837,9 +932,16 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { break; case tok_rbrace: sitem = stk_pop(parsestk); - if (!sitem) - error(err_unexbrace, &t.pos); - else { + if (!sitem) { + /* + * This closing brace could have been an + * indication that the cross-paragraph stack + * wants popping. Accordingly, we treat it here + * as an indication that the paragraph is over. + */ + already = TRUE; + goto finished_para; + } else { if (sitem->type & stack_ualt) { whptr = sitem->whptr; idximplicit = sitem->idximplicit; @@ -1170,16 +1272,26 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { dtor(t), t = get_token(in); seenwhite = iswhite; } + finished_para: /* Check the stack is empty */ - if (NULL != (sitem = stk_pop(parsestk))) { - do { + if (stk_top(parsestk)) { + while ((sitem = stk_pop(parsestk))) sfree(sitem); - sitem = stk_pop(parsestk); - } while (sitem); error(err_missingrbrace, &t.pos); } stk_free(parsestk); + prev_para_type = par.type; addpara(par, ret); + if (t.type == tok_eof) + already = TRUE; + } + + if (stk_top(crossparastk)) { + void *p; + + error(err_missingrbrace2, &t.pos); + while ((p = stk_pop(crossparastk))) + sfree(p); } /* @@ -1188,6 +1300,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { */ dtor(t); macrocleanup(macros); + + stk_free(crossparastk); } paragraph *read_input(input *in, indexdata *idx) { diff --git a/inputs/test.but b/inputs/test.but index 8ac6e86..059567e 100644 --- a/inputs/test.but +++ b/inputs/test.but @@ -19,7 +19,7 @@ a bit] \define{coopt} co\u00F6{-o}pt -\versionid $Id: test.but,v 1.19 2002/08/12 11:24:28 simon Exp $ +\versionid $Id: test.but,v 1.20 2004/03/23 20:10:23 simon Exp $ \C{ch\\ap} First chapter title; for similar wrapping reasons this chapter title will be ludicrously long. I wonder how much more @@ -33,6 +33,14 @@ fragments}. \cw{This} is weak code. And \k{head} contains some other stuff. \K{subhead} does too. +To test the man page back end: + +.Directive + +'Directive + +\\Sping\\Spong\\Spoing + \H{head} First section title (very long again, no prizes for guessing the reason why this time, and here's yet more text to pad it out to three lines of output) @@ -56,6 +64,39 @@ This is a list: \b Aah. +\lcont{ + +This bulletted list contains a list continuation. This is an +additional paragraph, or more than one, indented at the same level +as the list items, and able to contain nested sublists and other +features. For example, here's a code paragraph: + +\c spingle:~$ whoami +\c spoggler + +And here's a sublist. Numbered, just for variety. + +\n One. + +\lcont{ + +\n 1a. + +\n 1b. + +\n 1c. + +\lcont{This is an even sillier one: a continuation of a list item in +a continuation of a list item in a continuation of a list item!} + +} + +\n Two. + +\n Threeeee! + +} + \b Eek. This is a horizontal rule: @@ -70,6 +111,31 @@ This is a numbered list: \n Eek. \q{Aah} is point \k{keyword}. +This is a description list: + +\dt FISH + +\dd A piscine creature, often to be found swimming aimlessly around +in the sea eating things and not contributing to the global economy. + +\lcont{ + +Here's another of those funky list continuation things, just to keep +Halibut on its toes. + +} + +\dt BADGER + +\dd A non-piscine creature, often to be found snuffling around on +land, not contributing to the global economy, and not even swimming +to make up for it. I don't know. These mammals. Pa-thetic. + +\dt "SAUSAGE SALESMAN" + +\dd An exemplary contributor to the global economy. Unless he's CMOT +Dibbler. + A-paragraph-full-of-hyphens-to-test-the-idea-that-word-wrapping-can-happen-somewhere-in-all-this-hyphenatory-nonsense. A\-paragraph\-full\-of\-nonbreaking\-hyphens\-to\-test\-the\-idea\-that\-word\-wrapping\-misses\-them. diff --git a/main.c b/main.c index 64f1869..531d122 100644 --- a/main.c +++ b/main.c @@ -217,6 +217,7 @@ int main(int argc, char **argv) { text_backend(sourceform, keywords, idx); xhtml_backend(sourceform, keywords, idx); whlp_backend(sourceform, keywords, idx); + man_backend(sourceform, keywords, idx); free_para_list(sourceform); free_keywords(keywords); diff --git a/misc.c b/misc.c index c4ac72f..0de6c2a 100644 --- a/misc.c +++ b/misc.c @@ -41,6 +41,13 @@ void *stk_pop(stack s) { return NULL; } +void *stk_top(stack s) { + if (s->sp > 0) + return s->data[s->sp-1]; + else + return NULL; +} + /* * Small routines to amalgamate a string from an input source. */ -- 2.11.0