From da090173a115e3450b23e6b84aa98972c4f1619e Mon Sep 17 00:00:00 2001
From: simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Date: Sun, 13 Jun 2004 14:57:25 +0000
Subject: [PATCH] The Halibut manual contained at least one instance of two
 index terms (intentionally) differing only in case, which were being silently
 folded into one by the case-insensitive index tag comparison. Halibut now
 warns in this situation (but then folds them anyway, which I think is better
 than silently generating an index containing many case-distinct forms of the
 same word - I imagine it's very easy to do that by mistake). The manual has
 been fixed to explicitly define distinct keywords (in the case I spotted and
 in five other cases picked up by the new warning!), and also documents this
 issue and how to work with it.

git-svn-id: svn://svn.tartarus.org/sgt/halibut@4279 cda61777-01e9-0310-a592-d414129be87e
---
 doc/index.but  | 12 +++++++-----
 doc/input.but  | 53 +++++++++++++++++++++++++++++++++++++++++++----------
 doc/output.but |  2 +-
 error.c        | 16 +++++++++++++++-
 halibut.h      |  1 +
 index.c        | 15 +++++++++++++--
 misc.c         |  2 +-
 7 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/doc/index.but b/doc/index.but
index f57f51b..53f9c9e 100644
--- a/doc/index.but
+++ b/doc/index.but
@@ -314,7 +314,7 @@ directive
 \IM{\\W} \c{\\W} command
 \IM{\\u} \c{\\u} command
 \IM{\\k} \c{\\k} command
-\IM{\\K} \c{\\K} command
+\IM{\\K-upper} \c{\\K} command
 \IM{\\#} \c{\\#} command
 \IM{\\b} \c{\\b} command
 \IM{\\n} \c{\\n} command
@@ -323,23 +323,23 @@ directive
 \IM{\\lcont} \c{\\lcont} command
 \IM{\\rule} \c{\\rule} command
 \IM{\\quote} \c{\\quote} command
-\IM{\\C} \c{\\C} command
+\IM{\\C-upper} \c{\\C} command
 \IM{\\H} \c{\\H} command
 \IM{\\S} \c{\\S} command
 \IM{\\A} \c{\\A} command
-\IM{\\U} \c{\\U} command
+\IM{\\U-upper} \c{\\U} command
 \IM{\\S2} \c{\\S2}, \c{\\S3} commands etc.
 \IM{\\title} \c{\\title} command
 \IM{\\copyright} \c{\\copyright} command
 \IM{\\versionid} \c{\\versionid} command
-\IM{\\B} \c{\\B} command
+\IM{\\B-upper} \c{\\B} command
 \IM{\\BR} \c{\\BR} command
 \IM{\\nocite} \c{\\nocite} command
 \IM{\\i} \c{\\i} command
 \IM{\\i\\c} \c{\\i\\c} combination
 \IM{\\i\\cw} \c{\\i\\cw} combination
 \IM{\\i\\e} \c{\\i\\e} combination
-\IM{\\I} \c{\\I} command
+\IM{\\I-upper} \c{\\I} command
 \IM{\\ii} \c{\\ii} command
 \IM{\\IM} \c{\\IM} command
 \IM{\\cfg} \c{\\cfg} command
@@ -425,3 +425,5 @@ directive
 \IM{INFO-DIR-ENTRY} \cw{INFO-DIR-ENTRY}
 \IM{INFO-DIR-ENTRY} \cw{START-INFO-DIR-ENTRY}
 \IM{INFO-DIR-ENTRY} \cw{END-INFO-DIR-ENTRY}
+
+\IM{%N-upper} \c{%N}
diff --git a/doc/input.but b/doc/input.but
index 1c20acc..45cf08a 100644
--- a/doc/input.but
+++ b/doc/input.but
@@ -362,8 +362,8 @@ This is likely to cost \u20AC{EUR\_}2500 at least.
 
 If you read it in other formats, you may see different results.
 
-\S{input-xref} \i\c{\\k} and \i\c{\\K}: \ii{Cross-references} to
-other sections
+\S{input-xref} \i\c{\\k} and \I{\\K-upper}\c{\\K}:
+\ii{Cross-references} to other sections
 
 \K{intro-features} mentions that Halibut \I{section numbers}numbers
 the sections of your document automatically, and can generate
@@ -844,8 +844,8 @@ So now you know.
 
 }
 
-\S{input-sections} \i\c{\\C}, \i\c{\\H}, \i\c{\\S}, \i\c{\\A},
-\i\c{\\U}: Chapter and \i{section headings}
+\S{input-sections} \I{\\C-upper}\c{\\C}, \i\c{\\H}, \i\c{\\S},
+\i\c{\\A}, \I{\\U-upper}\c{\\U}: Chapter and \i{section headings}
 
 \K{intro-features} mentions that Halibut \I{section
 numbering}numbers the sections of your document automatically, and
@@ -1020,10 +1020,10 @@ If you need your document to refer to other documents (research
 papers, books, websites, whatever), you might find a bibliography
 feature useful.
 
-You can define a bibliography entry using the \i\c{\\B} command. This
-looks very like the \c{\\C} command and friends: it expects a
-keyword in braces, followed by some text describing the document
-being referred to. For example:
+You can define a bibliography entry using the \I{\\B-upper}\c{\\B}
+command. This looks very like the \c{\\C} command and friends: it
+expects a keyword in braces, followed by some text describing the
+document being referred to. For example:
 
 \c \B{freds-book} \q{The Taming Of The Mongoose}, by Fred Bloggs.
 \c Published by Paperjam & Notoner, 1993.
@@ -1126,8 +1126,8 @@ appear emphasised, you must say so explicitly using \c{\\IM}; see
 Sometimes you might want to index a term which is not explicitly
 mentioned, but which is highly relevant to the text and you think
 that somebody looking up that term in the index might find it useful
-to be directed here. To do this you can use the \i\c{\\I} command,
-to create an \i{\e{invisible} index tag}:
+to be directed here. To do this you can use the \I{\\I-upper}\c{\\I}
+command, to create an \i{\e{invisible} index tag}:
 
 \c If your printer runs out of toner, \I{replacing toner
 \c cartridge}here is what to do:
@@ -1232,6 +1232,39 @@ default one (typically \c{\\IM\{foo\}\_foo}, although it might be
 Halibut discards its default implicit one, and you must then specify
 that one explicitly as well if you wanted to keep it.
 
+\S{input-index-case} Indexing terms that differ only in case
+
+The \e{tags} you use to define an index term (that is, the text in
+the braces after \c{\\i}, \c{\\I} and \c{\\IM}) are treated
+case-insensitively by Halibut. So if, as in this manual itself, you
+need two index terms that differ only in case, doing this will not
+work:
+
+\c The \i\c{\\c} command defines computer code.
+\c
+\c The \i\c{\\C} command defines a chapter.
+
+Halibut will treat these terms as the same, and will fold the two
+sets of references into one combined list (although it will warn you
+that it is doing this). The idea is to ensure that people who forget
+to use \c{\\ii} find out about it rather than Halibut silently
+generating a bad index; checking an index for errors is very hard
+work, so Halibut tries to avoid errors in the first place as much as
+it can.
+
+If you do come across this situation, you will need to define two
+distinguishable index terms. What I did in this manual was something
+like this:
+
+\c The \i\c{\\c} command defines computer code.
+\c
+\c The \I{\\C-upper}\c{\\C} command defines a chapter.
+\c
+\c \IM{\\C-upper} \c{\\C}
+
+The effect of this will be two separate index entries, one reading
+\c{\\c} and the other reading \c{\\C}, pointing to the right places.
+
 \H{input-config} \ii{Configuring} Halibut
 
 Halibut uses the \i\c{\\cfg} command to allow you to configure various
diff --git a/doc/output.but b/doc/output.but
index 5ef0170..9b5af88 100644
--- a/doc/output.but
+++ b/doc/output.but
@@ -363,7 +363,7 @@ cent sign, you can write \c{%%}.)
 
 The formatting commands used in this template are:
 
-\dt \i\c{%N}
+\dt \I{%N-upper}\c{%N}
 
 \dd Expands to the visible title of the section, with white space
 removed. So in a chapter declared as \q{\cw{\\C\{fish\} Catching
diff --git a/error.c b/error.c
index 350e71a..879d081 100644
--- a/error.c
+++ b/error.c
@@ -18,7 +18,7 @@ static void do_error(int code, va_list ap) {
     char c;
     int i, j;
     char *sp, *sp2;
-    wchar_t *wsp;
+    wchar_t *wsp, *wsp2;
     filepos fpos, fpos2;
     int flags;
 
@@ -142,6 +142,20 @@ static void do_error(int code, va_list ap) {
 	sprintf(error, "unable to nest index markings");
 	flags = FILEPOS;
 	break;
+      case err_indexcase:
+	fpos = *va_arg(ap, filepos *);
+	wsp = va_arg(ap, wchar_t *);
+	sp = utoa_locale_dup(wsp);
+	fpos2 = *va_arg(ap, filepos *);
+	wsp2 = va_arg(ap, wchar_t *);
+	sp2 = utoa_locale_dup(wsp2);
+	sprintf(error, "warning: index tag `%.200s' used with ", sp);
+	sprintf(error + strlen(error), "different case (`%.200s') at %s:%d",
+		sp2, fpos2.filename, fpos2.line);
+	flags = FILEPOS;
+	sfree(sp);
+	sfree(sp2);
+	break;
       case err_nosuchkw:
 	fpos = *va_arg(ap, filepos *);
 	wsp = va_arg(ap, wchar_t *);
diff --git a/halibut.h b/halibut.h
index 4c9a745..01cbf3e 100644
--- a/halibut.h
+++ b/halibut.h
@@ -226,6 +226,7 @@ enum {
     err_missingrbrace2,		       /* unclosed braces at end of file */
     err_nestedstyles,		       /* unable to nest text styles */
     err_nestedindex,		       /* unable to nest `\i' thingys */
+    err_indexcase,		       /* two \i differing only in case */
     err_nosuchkw,		       /* unresolved cross-reference */
     err_multiBR,		       /* multiple \BRs on same keyword */
     err_nosuchidxtag,		       /* \IM on unknown index tag (warning) */
diff --git a/index.c b/index.c
index 5dde59f..628934e 100644
--- a/index.c
+++ b/index.c
@@ -109,8 +109,6 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text,
 	    t->implicit_text = text;
 	    t->implicit_fpos = *fpos;
 	} else {
-	    sfree(t);
-	    t = existing;
 	    if (!is_explicit) {
  		/*
 		 * An implicit \IM for a tag that's had an implicit
@@ -119,12 +117,25 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text,
 		 * differences. And check the tag for case match
 		 * against the existing tag, likewise.
 		 */
+
+		/*
+		 * Check the tag against its previous occurrence to
+		 * see if the cases match.
+		 */
+		if (ustrcmp(t->name, existing->name)) {
+		    error(err_indexcase, fpos, t->name,
+			  &existing->implicit_fpos, existing->name);
+		}
+
+		sfree(t);
 	    } else {
 		/*
 		 * An explicit \IM added to a valid tag. In
 		 * particular, this removes the implicit \IM if
 		 * present.
 		 */
+		sfree(t);
+		t = existing;
 		if (t->implicit_text) {
 		    free_word_list(t->implicit_text);
 		    t->implicit_text = NULL;
diff --git a/misc.c b/misc.c
index 0d488d4..a20c5b4 100644
--- a/misc.c
+++ b/misc.c
@@ -126,7 +126,7 @@ static int compare_wordlists_literally(word *a, word *b) {
 	} else {
 	    wchar_t *ap = a->text, *bp = b->text;
 	    while (*ap && *bp) {
-		wchar_t ac = utolower(*ap), bc = utolower(*bp);
+		wchar_t ac = *ap, bc = *bp;
 		if (ac != bc)
 		    return (ac < bc ? -1 : +1);
 		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
-- 
2.11.0