From 78c730856d459899f1ea8134c9f5415f586503ba Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 12 Jun 2004 20:09:40 +0000 Subject: [PATCH] Initial checkin of the shiny new rewritten-from-scratch HTML back end. There's a lot more _potentiality_ for new features than there are actual new features just yet, but future highlights include: configurable flavour of HTML (3.2, 4, XHTML Transitional or Strict), proper character set support (this is half way there already), and more flexible allocation of sections between multiple HTML files. Meanwhile, immediate benefits include correct handling of special characters within `author' and `description' strings, omission of the filename part in hyperlinks within the same HTML file (in particular, this means a single output file is now totally independent of its filename), and hyperlinks to the index from the top-level contents page (I'm amazed nobody has complained at the lack of this yet!). There are no doubt some shiny new bugs as well, but I'll never find them unless people start using the thing... git-svn-id: svn://svn.tartarus.org/sgt/halibut@4275 cda61777-01e9-0310-a592-d414129be87e --- LICENCE | 2 +- Makefile | 2 +- bk_html.c | 1881 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ bk_xhtml.c | 1779 ---------------------------------------------------- doc/licence.but | 4 +- doc/output.but | 4 +- halibut.h | 9 +- main.c | 4 +- ustring.c | 15 +- 9 files changed, 1908 insertions(+), 1792 deletions(-) create mode 100644 bk_html.c delete mode 100644 bk_xhtml.c diff --git a/LICENCE b/LICENCE index f5f065b..5df5d76 100644 --- a/LICENCE +++ b/LICENCE @@ -1,4 +1,4 @@ -Halibut is copyright (c) 1999-2004 Simon Tatham and James Aylett. +Halibut is copyright (c) 1999-2004 Simon Tatham. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files diff --git a/Makefile b/Makefile index fd6f932..8ca052a 100644 --- a/Makefile +++ b/Makefile @@ -115,7 +115,7 @@ include $(LIBCHARSET_SRCDIR)Makefile MODULES := main malloc ustring error help licence version misc tree234 MODULES += input keywords contents index style biblio -MODULES += bk_text bk_xhtml bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf +MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf MODULES += winhelp psdata wcwidth OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS) diff --git a/bk_html.c b/bk_html.c new file mode 100644 index 0000000..936126d --- /dev/null +++ b/bk_html.c @@ -0,0 +1,1881 @@ +/* + * HTML backend for Halibut + */ + +/* + * TODO: + * + * - I'm never entirely convinced that having a fragment link to + * come in at the start of the real text in the file is + * sensible. Perhaps for the topmost section in the file, no + * fragment should be used? (Though it should probably still be + * _there_ even if unused.) + * + * - new configurability: + * * a few new things explicitly labelled as `FIXME: + * configurable' or similar. + * * HTML flavour. + * * Some means of specifying the distinction between + * restrict-charset and output-charset. It seems to me that + * `html-charset' is output-charset, and that + * restrict-charset usually wants to be either output-charset + * or UTF-8 (the latter indicating that any Unicode character + * is fair game and it will be specified using &#foo; if it + * isn't in output-charset). However, since XHTML defaults to + * UTF-8 and it's fiddly to tell it otherwise, it's just + * possible that some user may need to set restrict-charset + * to their charset of choice while leaving _output_-charset + * at UTF-8. Figure out some configuration, and apply it. + * + * - test all HTML flavours and ensure they validate sensibly. Fix + * remaining confusion issues such as and obsoleteness + * of . + * + * - proper naming of all fragment IDs. The ones for sections are + * fine; the ones for numbered list and bibliociteds are utter + * crap; the ones for indexes _might_ do but it might be worth + * giving some thought to how to do them better. + * + also set up a mechanism for ensuring that fragment IDs + * never clash. + * + * - nonbreaking spaces? + */ + +#include +#include +#include +#include +#include "halibut.h" + +#define is_heading_type(type) ( (type) == para_Title || \ + (type) == para_Chapter || \ + (type) == para_Appendix || \ + (type) == para_UnnumberedChapter || \ + (type) == para_Heading || \ + (type) == para_Subsect) + +#define heading_depth(p) ( (p)->type == para_Subsect ? (p)->aux + 1 : \ + (p)->type == para_Heading ? 1 : \ + (p)->type == para_Title ? -1 : 0 ) + +typedef struct { + int just_numbers; + wchar_t *number_suffix; +} sectlevel; + +typedef struct { + int nasect; + sectlevel achapter, *asect; + int *contents_depths; /* 0=main, 1=chapter, 2=sect etc */ + int ncdepths; + int address_section, visible_version_id; + int leaf_contains_contents, leaf_smallest_contents; + char *contents_filename; + char *index_filename; + char *template_filename; + char *single_filename; + char *template_fragment; + char *head_end, *body_start, *body_end, *addr_start, *addr_end; + char *body_tag, *nav_attr; + wchar_t *author, *description; + int restrict_charset, output_charset; + enum { + HTML_3_2, HTML_4, + XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT + } htmlver; + wchar_t *lquote, *rquote; + int leaf_level; +} htmlconfig; + +#define contents_depth(conf, level) \ + ( (conf).ncdepths > (level) ? (conf).contents_depths[level] : (level)+2 ) + +#define is_xhtml(ver) ((ver) >= XHTML_1_0_TRANSITIONAL) + +typedef struct htmlfile htmlfile; +typedef struct htmlsect htmlsect; + +struct htmlfile { + htmlfile *next; + char *filename; + int last_fragment_number; + int min_heading_depth; + htmlsect *first, *last; /* first/last highest-level sections */ +}; + +struct htmlsect { + htmlsect *next, *parent; + htmlfile *file; + paragraph *title, *text; + enum { NORMAL, TOP, INDEX } type; + int contents_depth; + char *fragment; +}; + +typedef struct { + htmlfile *head, *tail; + htmlfile *single, *index; +} htmlfilelist; + +typedef struct { + htmlsect *head, *tail; +} htmlsectlist; + +typedef struct { + int nrefs, refsize; + word **refs; +} htmlindex; + +typedef struct { + htmlsect *section; + char *fragment; +} htmlindexref; + +typedef struct { + /* + * This level deals with charset conversion, starting and + * ending tags, and writing to the file. It's the lexical + * level. + */ + FILE *fp; + int charset; + charset_state cstate; + int ver; + enum { + HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT + } state; + /* + * Stuff beyond here deals with the higher syntactic level: it + * tracks how many levels of \n"); - } - } - return count; -} - -/* As above, but doesn't do anything in the current file */ -static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file) -{ - int level, limit, start_level, count = 0; - if (!file) - return 0; - - level = (file->sections)?(file->sections->level):(0); - limit = conf.contents_depth[(level>5)?(5):(level)]; - start_level = (file->is_leaf) ? (level-1) : (level); - last_level = start_level; - - count = xhtml_do_contents_limit(fp, file->child, limit); - if (fp!=NULL) { - while (last_level > start_level) { - last_level--; - fprintf(fp, "\n"); - } - } - return count; -} - -/* - * Write contents for the given file, children, and siblings, down to - * given limit contents depth. - */ -static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit) -{ - int count = 0; - while (file) { - count += xhtml_do_contents_section_limit(fp, file->sections, limit); - count += xhtml_do_contents_limit(fp, file->child, limit); - file = file->next; - } - return count; -} - -/* - * Write contents entries for the given section tree, down to the - * limit contents depth. - */ -static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit) -{ - int count = 0; - while (section) { - if (!xhtml_add_contents_entry(fp, section, limit)) - return 0; - else - count++; - count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); - section = section->next; - } - return count; -} - -/* - * Write contents entries for the given section tree, down to the - * limit contents depth. - */ -static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit) -{ - int count = 0; - if (!section) - return 0; - xhtml_add_contents_entry(fp, section, limit); - count=1; - count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); - /* section=section->child; - while (section && xhtml_add_contents_entry(fp, section, limit)) { - section = section->next; - }*/ - return count; -} - -/* - * Add a section entry, unless we're exceeding the limit, in which - * case return FALSE (otherwise return TRUE). - */ -static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit) -{ - if (!section || section->level > limit) - return FALSE; - if (fp==NULL || section->level < 0) - return TRUE; - if (last_level > section->level) { - while (last_level > section->level) { - last_level--; - fprintf(fp, "\n"); - } - fprintf(fp, "\n"); - } else if (last_level < section->level) { - assert(last_level == section->level - 1); - last_level++; - fprintf(fp, "