Add an error check for correct formatting in Deflate uncompressed
[sgt/halibut] / index.c
diff --git a/index.c b/index.c
index 3b1df51..628934e 100644 (file)
--- a/index.c
+++ b/index.c
@@ -10,17 +10,18 @@ static int compare_tags(void *av, void *bv);
 static int compare_entries(void *av, void *bv);
 
 indexdata *make_index(void) {
-    indexdata *ret = mknew(indexdata);
+    indexdata *ret = snew(indexdata);
     ret->tags = newtree234(compare_tags);
     ret->entries = newtree234(compare_entries);
     return ret;
 }
 
 static indextag *make_indextag(void) {
-    indextag *ret = mknew(indextag);
+    indextag *ret = snew(indextag);
     ret->name = NULL;
     ret->implicit_text = NULL;
     ret->explicit_texts = NULL;
+    ret->explicit_fpos = NULL;
     ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
     ret->refs = NULL;
     return ret;
@@ -57,10 +58,29 @@ indextag *index_findtag(indexdata *idx, wchar_t *name) {
  * Guarantee on calling sequence: all implicit merges are given
  * before the explicit ones.
  */
-void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
+void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text,
+                filepos *fpos) {
     indextag *t, *existing;
 
     /*
+     * For an implicit merge, we want to remove all emphasis,
+     * because the chances are that the user didn't really want to
+     * index the term as emphasised.
+     */
+    {
+       word *w;
+
+       for (w = text; w; w = w->next) {
+           if (w->type == word_Emph)
+               w->type = word_Normal;
+           else if (w->type == word_EmphSpace)
+               w->type = word_WhiteSpace;
+           else if (w->type == word_EmphQuote)
+               w->type = word_Quote;
+       }
+    }
+
+    /*
      * FIXME: want to warn on overlapping source sets.
      */
     for (; *tags; tags = uadv(tags)) {
@@ -79,7 +99,7 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
             * warn (and drop it, since it won't be referenced).
             */
            if (is_explicit) {
-               error(err_nosuchidxtag, tags);
+               error(err_nosuchidxtag, fpos, tags);
                continue;
            }
 
@@ -87,9 +107,8 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
             * Otherwise, this is a new tag with an implicit \IM.
             */
            t->implicit_text = text;
+           t->implicit_fpos = *fpos;
        } else {
-           sfree(t);
-           t = existing;
            if (!is_explicit) {
                /*
                 * An implicit \IM for a tag that's had an implicit
@@ -98,22 +117,39 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
                 * differences. And check the tag for case match
                 * against the existing tag, likewise.
                 */
+
+               /*
+                * Check the tag against its previous occurrence to
+                * see if the cases match.
+                */
+               if (ustrcmp(t->name, existing->name)) {
+                   error(err_indexcase, fpos, t->name,
+                         &existing->implicit_fpos, existing->name);
+               }
+
+               sfree(t);
            } else {
                /*
                 * An explicit \IM added to a valid tag. In
                 * particular, this removes the implicit \IM if
                 * present.
                 */
+               sfree(t);
+               t = existing;
                if (t->implicit_text) {
                    free_word_list(t->implicit_text);
                    t->implicit_text = NULL;
                }
                if (t->nexplicit >= t->explicit_size) {
                    t->explicit_size = t->nexplicit + 8;
-                   t->explicit_texts = resize(t->explicit_texts,
-                                              t->explicit_size);
+                   t->explicit_texts = sresize(t->explicit_texts,
+                                               t->explicit_size, word *);
+                   t->explicit_fpos = sresize(t->explicit_fpos,
+                                              t->explicit_size, filepos);
                }
-               t->explicit_texts[t->nexplicit++] = text;
+               t->explicit_texts[t->nexplicit] = text;
+               t->explicit_fpos[t->nexplicit] = *fpos;
+               t->nexplicit++;
            }
        }
     }
@@ -129,6 +165,7 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
 void build_index(indexdata *i) {
     indextag *t;
     word **ta;
+    filepos *fa;
     int ti;
     int j;
 
@@ -136,15 +173,18 @@ void build_index(indexdata *i) {
        if (t->implicit_text) {
            t->nrefs = 1;
            ta = &t->implicit_text;
+           fa = &t->implicit_fpos;
        } else {
            t->nrefs = t->nexplicit;
            ta = t->explicit_texts;
+           fa = t->explicit_fpos;
        }
        if (t->nrefs) {
-           t->refs = mknewa(indexentry *, t->nrefs);
+           t->refs = snewn(t->nrefs, indexentry *);
            for (j = 0; j < t->nrefs; j++) {
-               indexentry *ent = mknew(indexentry);
+               indexentry *ent = snew(indexentry);
                ent->text = *ta++;
+               ent->fpos = *fa++;
                t->refs[j] = add234(i->entries, ent);
                if (t->refs[j] != ent)     /* duplicate */
                    sfree(ent);