From da090173a115e3450b23e6b84aa98972c4f1619e Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 13 Jun 2004 14:57:25 +0000 Subject: [PATCH] The Halibut manual contained at least one instance of two index terms (intentionally) differing only in case, which were being silently folded into one by the case-insensitive index tag comparison. Halibut now warns in this situation (but then folds them anyway, which I think is better than silently generating an index containing many case-distinct forms of the same word - I imagine it's very easy to do that by mistake). The manual has been fixed to explicitly define distinct keywords (in the case I spotted and in five other cases picked up by the new warning!), and also documents this issue and how to work with it. git-svn-id: svn://svn.tartarus.org/sgt/halibut@4279 cda61777-01e9-0310-a592-d414129be87e --- doc/index.but | 12 +++++++----- doc/input.but | 53 +++++++++++++++++++++++++++++++++++++++++++---------- doc/output.but | 2 +- error.c | 16 +++++++++++++++- halibut.h | 1 + index.c | 15 +++++++++++++-- misc.c | 2 +- 7 files changed, 81 insertions(+), 20 deletions(-) diff --git a/doc/index.but b/doc/index.but index f57f51b..53f9c9e 100644 --- a/doc/index.but +++ b/doc/index.but @@ -314,7 +314,7 @@ directive \IM{\\W} \c{\\W} command \IM{\\u} \c{\\u} command \IM{\\k} \c{\\k} command -\IM{\\K} \c{\\K} command +\IM{\\K-upper} \c{\\K} command \IM{\\#} \c{\\#} command \IM{\\b} \c{\\b} command \IM{\\n} \c{\\n} command @@ -323,23 +323,23 @@ directive \IM{\\lcont} \c{\\lcont} command \IM{\\rule} \c{\\rule} command \IM{\\quote} \c{\\quote} command -\IM{\\C} \c{\\C} command +\IM{\\C-upper} \c{\\C} command \IM{\\H} \c{\\H} command \IM{\\S} \c{\\S} command \IM{\\A} \c{\\A} command -\IM{\\U} \c{\\U} command +\IM{\\U-upper} \c{\\U} command \IM{\\S2} \c{\\S2}, \c{\\S3} commands etc. \IM{\\title} \c{\\title} command \IM{\\copyright} \c{\\copyright} command \IM{\\versionid} \c{\\versionid} command -\IM{\\B} \c{\\B} command +\IM{\\B-upper} \c{\\B} command \IM{\\BR} \c{\\BR} command \IM{\\nocite} \c{\\nocite} command \IM{\\i} \c{\\i} command \IM{\\i\\c} \c{\\i\\c} combination \IM{\\i\\cw} \c{\\i\\cw} combination \IM{\\i\\e} \c{\\i\\e} combination -\IM{\\I} \c{\\I} command +\IM{\\I-upper} \c{\\I} command \IM{\\ii} \c{\\ii} command \IM{\\IM} \c{\\IM} command \IM{\\cfg} \c{\\cfg} command @@ -425,3 +425,5 @@ directive \IM{INFO-DIR-ENTRY} \cw{INFO-DIR-ENTRY} \IM{INFO-DIR-ENTRY} \cw{START-INFO-DIR-ENTRY} \IM{INFO-DIR-ENTRY} \cw{END-INFO-DIR-ENTRY} + +\IM{%N-upper} \c{%N} diff --git a/doc/input.but b/doc/input.but index 1c20acc..45cf08a 100644 --- a/doc/input.but +++ b/doc/input.but @@ -362,8 +362,8 @@ This is likely to cost \u20AC{EUR\_}2500 at least. If you read it in other formats, you may see different results. -\S{input-xref} \i\c{\\k} and \i\c{\\K}: \ii{Cross-references} to -other sections +\S{input-xref} \i\c{\\k} and \I{\\K-upper}\c{\\K}: +\ii{Cross-references} to other sections \K{intro-features} mentions that Halibut \I{section numbers}numbers the sections of your document automatically, and can generate @@ -844,8 +844,8 @@ So now you know. } -\S{input-sections} \i\c{\\C}, \i\c{\\H}, \i\c{\\S}, \i\c{\\A}, -\i\c{\\U}: Chapter and \i{section headings} +\S{input-sections} \I{\\C-upper}\c{\\C}, \i\c{\\H}, \i\c{\\S}, +\i\c{\\A}, \I{\\U-upper}\c{\\U}: Chapter and \i{section headings} \K{intro-features} mentions that Halibut \I{section numbering}numbers the sections of your document automatically, and @@ -1020,10 +1020,10 @@ If you need your document to refer to other documents (research papers, books, websites, whatever), you might find a bibliography feature useful. -You can define a bibliography entry using the \i\c{\\B} command. This -looks very like the \c{\\C} command and friends: it expects a -keyword in braces, followed by some text describing the document -being referred to. For example: +You can define a bibliography entry using the \I{\\B-upper}\c{\\B} +command. This looks very like the \c{\\C} command and friends: it +expects a keyword in braces, followed by some text describing the +document being referred to. For example: \c \B{freds-book} \q{The Taming Of The Mongoose}, by Fred Bloggs. \c Published by Paperjam & Notoner, 1993. @@ -1126,8 +1126,8 @@ appear emphasised, you must say so explicitly using \c{\\IM}; see Sometimes you might want to index a term which is not explicitly mentioned, but which is highly relevant to the text and you think that somebody looking up that term in the index might find it useful -to be directed here. To do this you can use the \i\c{\\I} command, -to create an \i{\e{invisible} index tag}: +to be directed here. To do this you can use the \I{\\I-upper}\c{\\I} +command, to create an \i{\e{invisible} index tag}: \c If your printer runs out of toner, \I{replacing toner \c cartridge}here is what to do: @@ -1232,6 +1232,39 @@ default one (typically \c{\\IM\{foo\}\_foo}, although it might be Halibut discards its default implicit one, and you must then specify that one explicitly as well if you wanted to keep it. +\S{input-index-case} Indexing terms that differ only in case + +The \e{tags} you use to define an index term (that is, the text in +the braces after \c{\\i}, \c{\\I} and \c{\\IM}) are treated +case-insensitively by Halibut. So if, as in this manual itself, you +need two index terms that differ only in case, doing this will not +work: + +\c The \i\c{\\c} command defines computer code. +\c +\c The \i\c{\\C} command defines a chapter. + +Halibut will treat these terms as the same, and will fold the two +sets of references into one combined list (although it will warn you +that it is doing this). The idea is to ensure that people who forget +to use \c{\\ii} find out about it rather than Halibut silently +generating a bad index; checking an index for errors is very hard +work, so Halibut tries to avoid errors in the first place as much as +it can. + +If you do come across this situation, you will need to define two +distinguishable index terms. What I did in this manual was something +like this: + +\c The \i\c{\\c} command defines computer code. +\c +\c The \I{\\C-upper}\c{\\C} command defines a chapter. +\c +\c \IM{\\C-upper} \c{\\C} + +The effect of this will be two separate index entries, one reading +\c{\\c} and the other reading \c{\\C}, pointing to the right places. + \H{input-config} \ii{Configuring} Halibut Halibut uses the \i\c{\\cfg} command to allow you to configure various diff --git a/doc/output.but b/doc/output.but index 5ef0170..9b5af88 100644 --- a/doc/output.but +++ b/doc/output.but @@ -363,7 +363,7 @@ cent sign, you can write \c{%%}.) The formatting commands used in this template are: -\dt \i\c{%N} +\dt \I{%N-upper}\c{%N} \dd Expands to the visible title of the section, with white space removed. So in a chapter declared as \q{\cw{\\C\{fish\} Catching diff --git a/error.c b/error.c index 350e71a..879d081 100644 --- a/error.c +++ b/error.c @@ -18,7 +18,7 @@ static void do_error(int code, va_list ap) { char c; int i, j; char *sp, *sp2; - wchar_t *wsp; + wchar_t *wsp, *wsp2; filepos fpos, fpos2; int flags; @@ -142,6 +142,20 @@ static void do_error(int code, va_list ap) { sprintf(error, "unable to nest index markings"); flags = FILEPOS; break; + case err_indexcase: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = utoa_locale_dup(wsp); + fpos2 = *va_arg(ap, filepos *); + wsp2 = va_arg(ap, wchar_t *); + sp2 = utoa_locale_dup(wsp2); + sprintf(error, "warning: index tag `%.200s' used with ", sp); + sprintf(error + strlen(error), "different case (`%.200s') at %s:%d", + sp2, fpos2.filename, fpos2.line); + flags = FILEPOS; + sfree(sp); + sfree(sp2); + break; case err_nosuchkw: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); diff --git a/halibut.h b/halibut.h index 4c9a745..01cbf3e 100644 --- a/halibut.h +++ b/halibut.h @@ -226,6 +226,7 @@ enum { err_missingrbrace2, /* unclosed braces at end of file */ err_nestedstyles, /* unable to nest text styles */ err_nestedindex, /* unable to nest `\i' thingys */ + err_indexcase, /* two \i differing only in case */ err_nosuchkw, /* unresolved cross-reference */ err_multiBR, /* multiple \BRs on same keyword */ err_nosuchidxtag, /* \IM on unknown index tag (warning) */ diff --git a/index.c b/index.c index 5dde59f..628934e 100644 --- a/index.c +++ b/index.c @@ -109,8 +109,6 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text, t->implicit_text = text; t->implicit_fpos = *fpos; } else { - sfree(t); - t = existing; if (!is_explicit) { /* * An implicit \IM for a tag that's had an implicit @@ -119,12 +117,25 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text, * differences. And check the tag for case match * against the existing tag, likewise. */ + + /* + * Check the tag against its previous occurrence to + * see if the cases match. + */ + if (ustrcmp(t->name, existing->name)) { + error(err_indexcase, fpos, t->name, + &existing->implicit_fpos, existing->name); + } + + sfree(t); } else { /* * An explicit \IM added to a valid tag. In * particular, this removes the implicit \IM if * present. */ + sfree(t); + t = existing; if (t->implicit_text) { free_word_list(t->implicit_text); t->implicit_text = NULL; diff --git a/misc.c b/misc.c index 0d488d4..a20c5b4 100644 --- a/misc.c +++ b/misc.c @@ -126,7 +126,7 @@ static int compare_wordlists_literally(word *a, word *b) { } else { wchar_t *ap = a->text, *bp = b->text; while (*ap && *bp) { - wchar_t ac = utolower(*ap), bc = utolower(*bp); + wchar_t ac = *ap, bc = *bp; if (ac != bc) return (ac < bc ? -1 : +1); if (!*++ap && a->next && a->next->type == t && !a->next->alt) -- 2.11.0