X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/12f0ee84ca3e9810b49601347e9ecc0a0d97e948..e32df3e7953d1519f52be001d197fe3d69aac5eb:/bk_html.c diff --git a/bk_html.c b/bk_html.c index 4da897f..0c66eb7 100644 --- a/bk_html.c +++ b/bk_html.c @@ -10,6 +10,13 @@ * sensible. Perhaps for the topmost section in the file, no * fragment should be used? (Though it should probably still be * _there_ even if unused.) + * + * - In HHK index mode: subsidiary hhk entries (as in replacing + * `foo, bar' with `foo\n\tbar') can be done by embedding + * sub-

@@ -41,17 +48,20 @@ typedef struct { int ncdepths; int address_section, visible_version_id; int leaf_contains_contents, leaf_smallest_contents; + int navlinks; + int rellinks; char *contents_filename; char *index_filename; char *template_filename; char *single_filename; + char *chm_filename, *hhp_filename, *hhc_filename, *hhk_filename; char **template_fragments; int ntfragments; char *head_end, *body_start, *body_end, *addr_start, *addr_end; char *body_tag, *nav_attr; wchar_t *author, *description; wchar_t *index_text, *contents_text, *preamble_text, *title_separator; - wchar_t *nav_prev_text, *nav_next_text, *nav_separator; + wchar_t *nav_prev_text, *nav_next_text, *nav_up_text, *nav_separator; wchar_t *index_main_sep, *index_multi_sep; wchar_t *pre_versionid, *post_versionid; int restrict_charset, output_charset; @@ -77,6 +87,13 @@ struct htmlfile { int last_fragment_number; int min_heading_depth; htmlsect *first, *last; /* first/last highest-level sections */ + /* + * The `temp' field is available for use in individual passes + * over the file list. For example, the HHK index generation + * uses it to ensure no index term references the same file + * more than once. + */ + int temp; }; struct htmlsect { @@ -92,6 +109,7 @@ typedef struct { htmlfile *head, *tail; htmlfile *single, *index; tree234 *frags; + tree234 *files; } htmlfilelist; typedef struct { @@ -127,6 +145,8 @@ typedef struct { enum { HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT } state; + int hackflags; /* used for icky .HH* stuff */ + int hacklimit; /* text size limit, again for .HH* */ /* * Stuff beyond here deals with the higher syntactic level: it * tracks how many levels of

. + */ + if (conf.hhp_filename) { + htmlfile *f; + htmloutput ho; + + ho.charset = CS_CP1252; /* as far as I know, HHP files are */ + ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTENOTHING; + + ho.fp = fopen(conf.hhp_filename, "w"); + if (!ho.fp) + error(err_cantopenw, conf.hhp_filename); + + fprintf(ho.fp, + "[OPTIONS]\n" + /* Binary TOC required for Next/Previous nav to work */ + "Binary TOC=Yes\n" + "Compatibility=1.1 or later\n" + "Compiled file=%s\n" + "Default Window=main\n" + "Default topic=%s\n" + "Display compile progress=Yes\n" + "Full-text search=Yes\n" + "Title=", conf.chm_filename, files.head->filename); + + ho.hacklimit = 255; + html_words(&ho, topsect->title->words, NOTHING, + NULL, keywords, &conf); + + fprintf(ho.fp, "\n"); + + /* + * These two entries don't seem to be remotely necessary + * for a successful run of the help _compiler_, but + * omitting them causes the GUI Help Workshop to behave + * rather strangely if you try to load the help project + * into that and edit it. + */ + if (conf.hhc_filename) + fprintf(ho.fp, "Contents file=%s\n", conf.hhc_filename); + if (hhk_filename) + fprintf(ho.fp, "Index file=%s\n", hhk_filename); + + fprintf(ho.fp, "\n[WINDOWS]\nmain=\""); + + ho.hackflags |= HO_HACK_OMITQUOTES; + ho.hacklimit = 255; + html_words(&ho, topsect->title->words, NOTHING, + NULL, keywords, &conf); + + fprintf(ho.fp, "\",\"%s\",\"%s\",\"%s\",,,,,," + /* This first magic number is fsWinProperties, controlling + * Navigation Pane options and the like. + * Constants HHWIN_PROP_* in htmlhelp.h. */ + "0x62520,," + /* This second number is fsToolBarFlags, mainly controlling + * toolbar buttons. Constants HHWIN_BUTTON_*. + * NOTE: there are two pairs of bits for Next/Previous + * buttons: 7/8 (which do nothing useful), and 21/22 + * (which work). (Neither of these are exposed in the HHW + * UI, but they work fine in HH.) We use the latter. */ + "0x60304e,,,,,,,,0\n", + conf.hhc_filename ? conf.hhc_filename : "", + hhk_filename ? hhk_filename : "", + files.head->filename); + + /* + * The [FILES] section is also not necessary for + * compilation (hhc appears to build up a list of needed + * files just by following links from the given starting + * points), but useful for loading the project into HHW. + */ + fprintf(ho.fp, "\n[FILES]\n"); + for (f = files.head; f; f = f->next) + fprintf(ho.fp, "%s\n", f->filename); + + fclose(ho.fp); + } + if (conf.hhc_filename) { + htmlfile *f; + htmlsect *s, *a; + htmloutput ho; + int currdepth = 0; + + ho.fp = fopen(conf.hhc_filename, "w"); + if (!ho.fp) + error(err_cantopenw, conf.hhc_filename); + + ho.charset = CS_CP1252; /* as far as I know, HHC files are */ + ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTEQUOTES; + + /* + * Magic DOCTYPE which seems to work for .HHC files. I'm + * wary of trying to change it! + */ + fprintf(ho.fp, "\n" + "\n" + "\n" + "

\n"); + currdepth++; + } + while (currdepth > depth) { + fprintf(ho.fp, "

\n", + f->filename, leaf ? 11 : 1); + } + + while (currdepth > 0) { + fprintf(ho.fp, "

\n"); + currdepth--; + } + + fprintf(ho.fp, "

\n"); + + cleanup(&ho); + } + if (hhk_filename) { + htmlfile *f; + htmloutput ho; + indexentry *entry; + int i; + + /* + * First make a pass over all HTML files and set their + * `temp' fields to zero, because we're about to use them. + */ + for (f = files.head; f; f = f->next) + f->temp = 0; + + ho.fp = fopen(hhk_filename, "w"); + if (!ho.fp) + error(err_cantopenw, hhk_filename); + + ho.charset = CS_CP1252; /* as far as I know, HHK files are */ + ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTEQUOTES; + + /* + * Magic DOCTYPE which seems to work for .HHK files. I'm + * wary of trying to change it! + */ + fprintf(ho.fp, "\n" + "\n" + "\n" + "

\n"); + + /* + * Now go through those files and re-clear the temp + * fields ready for the _next_ index term. + */ + for (j = 0; j < hi->nrefs; j++) { + htmlindexref *hr = + (htmlindexref *)hi->refs[j]->private_data; + hr->section->file->temp = 0; + } + } + } + + fprintf(ho.fp, "

\n"); + cleanup(&ho); + } + + /* * Go through and check that no index fragments were referenced * without being generated, or indeed vice versa. * @@ -1477,6 +1955,11 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } freetree234(files.frags); } + /* + * The strings in files.files are all owned by their containing + * htmlfile structures, so there's no need to free them here. + */ + freetree234(files.files); { htmlsect *sect, *tmp; sect = sects.head; @@ -1570,8 +2053,12 @@ static void html_file_section(htmlconfig *cfg, htmlfilelist *files, * we invent a fresh file and put this section at its head. * Otherwise, we put it in the same file as its parent * section. + * + * Another special value of cfg->leaf_level is -1, which + * means infinity (i.e. it's considered to always be + * greater than depth). */ - if (ldepth > cfg->leaf_level) { + if (cfg->leaf_level > 0 && ldepth > cfg->leaf_level) { /* * We know that sect->parent cannot be NULL. The only * circumstance in which it can be is if sect is at @@ -1627,7 +2114,8 @@ static htmlfile *html_new_file(htmlfilelist *list, char *filename) list->head = ret; list->tail = ret; - ret->filename = dupstr(filename); + ret->filename = html_sanitise_filename(list, dupstr(filename)); + add234(list->files, ret->filename); ret->last_fragment_number = 0; ret->min_heading_depth = INT_MAX; ret->first = ret->last = NULL; @@ -1811,16 +2299,18 @@ static void html_charset_cleanup(htmloutput *ho) bytes = charset_from_unicode(NULL, NULL, outbuf, lenof(outbuf), ho->charset, &ho->cstate, NULL); - if (bytes > 0) + if (ho->fp && bytes > 0) fwrite(outbuf, 1, bytes, ho->fp); } static void return_mostly_to_neutral(htmloutput *ho) { - if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { - fprintf(ho->fp, " />"); - } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) { - fprintf(ho->fp, ">"); + if (ho->fp) { + if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { + fprintf(ho->fp, " />"); + } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) { + fprintf(ho->fp, ">"); + } } ho->state = HO_NEUTRAL; @@ -1838,58 +2328,68 @@ static void return_to_neutral(htmloutput *ho) static void element_open(htmloutput *ho, char const *name) { return_to_neutral(ho); - fprintf(ho->fp, "<%s", name); + if (ho->fp) + fprintf(ho->fp, "<%s", name); ho->state = HO_IN_TAG; } static void element_close(htmloutput *ho, char const *name) { return_to_neutral(ho); - fprintf(ho->fp, "", name); + if (ho->fp) + fprintf(ho->fp, "", name); ho->state = HO_NEUTRAL; } static void element_empty(htmloutput *ho, char const *name) { return_to_neutral(ho); - fprintf(ho->fp, "<%s", name); + if (ho->fp) + fprintf(ho->fp, "<%s", name); ho->state = HO_IN_EMPTY_TAG; } static void html_nl(htmloutput *ho) { return_to_neutral(ho); - fputc('\n', ho->fp); + if (ho->fp) + fputc('\n', ho->fp); } static void html_raw(htmloutput *ho, char *text) { return_to_neutral(ho); - fputs(text, ho->fp); + if (ho->fp) + fputs(text, ho->fp); } static void html_raw_as_attr(htmloutput *ho, char *text) { assert(ho->state == HO_IN_TAG || ho->state == HO_IN_EMPTY_TAG); - fputc(' ', ho->fp); - fputs(text, ho->fp); + if (ho->fp) { + fputc(' ', ho->fp); + fputs(text, ho->fp); + } } static void element_attr(htmloutput *ho, char const *name, char const *value) { html_charset_cleanup(ho); assert(ho->state == HO_IN_TAG || ho->state == HO_IN_EMPTY_TAG); - fprintf(ho->fp, " %s=\"%s\"", name, value); + if (ho->fp) + fprintf(ho->fp, " %s=\"%s\"", name, value); } static void element_attr_w(htmloutput *ho, char const *name, wchar_t const *value) { html_charset_cleanup(ho); - fprintf(ho->fp, " %s=\"", name); + if (ho->fp) + fprintf(ho->fp, " %s=\"", name); html_text_limit_internal(ho, value, 0, TRUE, FALSE); html_charset_cleanup(ho); - fputc('"', ho->fp); + if (ho->fp) + fputc('"', ho->fp); } static void html_text(htmloutput *ho, wchar_t const *text) @@ -1917,8 +2417,16 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, char outbuf[256]; int bytes, err; + if (ho->hackflags & (HO_HACK_QUOTEQUOTES | HO_HACK_OMITQUOTES)) + quote_quotes = TRUE; /* override the input value */ + if (maxlen > 0 && textlen > maxlen) textlen = maxlen; + if (ho->hacklimit >= 0) { + if (textlen > ho->hacklimit) + textlen = ho->hacklimit; + ho->hacklimit -= textlen; + } while (textlen > 0) { /* Scan ahead for characters we really can't display in HTML. */ @@ -1934,7 +2442,7 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf), ho->charset, &ho->cstate, &err); textlen -= (lenbefore - lenafter); - if (bytes > 0) + if (bytes > 0 && ho->fp) fwrite(outbuf, 1, bytes, ho->fp); if (err) { /* @@ -1943,26 +2451,35 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, * we use an HTML numeric entity reference. */ assert(textlen > 0); - fprintf(ho->fp, "&#%ld;", (long int)*text); + if (ho->fp) + fprintf(ho->fp, "&#%ld;", (long int)*text); text++, textlen--; } else if (lenafter == 0 && textlen > 0) { /* * We have encountered a character which is special to * HTML. */ - if (*text == L'<') - fprintf(ho->fp, "<"); - else if (*text == L'>') - fprintf(ho->fp, ">"); - else if (*text == L'&') - fprintf(ho->fp, "&"); - else if (*text == L'"') - fprintf(ho->fp, """); - else if (*text == L' ') { - assert(nbsp); - fprintf(ho->fp, " "); - } else - assert(!"Can't happen"); + if (ho->fp) { + if (*text == L'"' && (ho->hackflags & HO_HACK_OMITQUOTES)) { + fputc('\'', ho->fp); + } else if (ho->hackflags & HO_HACK_QUOTENOTHING) { + fputc(*text, ho->fp); + } else { + if (*text == L'<') + fprintf(ho->fp, "<"); + else if (*text == L'>') + fprintf(ho->fp, ">"); + else if (*text == L'&') + fprintf(ho->fp, "&"); + else if (*text == L'"') + fprintf(ho->fp, """); + else if (*text == L' ') { + assert(nbsp); + fprintf(ho->fp, " "); + } else + assert(!"Can't happen"); + } + } text++, textlen--; } } @@ -1971,7 +2488,8 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, static void cleanup(htmloutput *ho) { return_to_neutral(ho); - fclose(ho->fp); + if (ho->fp) + fclose(ho->fp); } static void html_href(htmloutput *ho, htmlfile *thisfile, @@ -2101,7 +2619,7 @@ static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file, /* If there's nothing left, make something valid up */ if (!*text) { - static const char *anonfrag = "anon"; + static const char anonfrag[] = "anon"; text = sresize(text, lenof(anonfrag), char); strcpy(text, anonfrag); } @@ -2132,6 +2650,69 @@ static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file, return text; } +static char *html_sanitise_filename(htmlfilelist *files, char *text) +{ + /* + * Unceremoniously rip out any character that might cause + * difficulty in some filesystem or another, or be otherwise + * inconvenient. + * + * That doesn't leave much punctuation. I permit alphanumerics + * and +-.=_ only. + */ + char *p = text, *q = text; + + while (*p) { + if ((*p>='A' && *p<='Z') || + (*p>='a' && *p<='z') || + (*p>='0' && *p<='9') || + *p=='-' || *p=='_' || *p=='+' || *p=='.' || *p=='=') + *q++ = *p; + p++; + } + *q = '\0'; + + /* If there's nothing left, make something valid up */ + if (!*text) { + static const char anonfrag[] = "anon.html"; + text = sresize(text, lenof(anonfrag), char); + strcpy(text, anonfrag); + } + + /* + * Now we check for clashes with other filenames, and adjust + * this one if necessary by appending a hyphen followed by a + * number just before the file extension (if any). + */ + { + int len, extpos; + int suffix = 1; + + p = NULL; + + while (find234(files->files, text, NULL)) { + if (!p) { + len = strlen(text); + p = text; + text = snewn(len+20, char); + + for (extpos = len; extpos > 0 && p[extpos-1] != '.'; extpos--); + if (extpos > 0) + extpos--; + else + extpos = len; + } + + sprintf(text, "%.*s-%d%s", extpos, p, ++suffix, p+extpos); + } + + if (p) + sfree(p); + } + + return text; +} + static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s, htmlfile *thisfile, keywordlist *keywords, htmlconfig *cfg)