Rename Buttress to Halibut. I _think_ I've caught everything in this pass.
[sgt/halibut] / index.c
CommitLineData
d7482997 1/*
2 * index.c: create and collate index data structures
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include "halibut.h"
8
9static int compare_tags(void *av, void *bv);
10static int compare_entries(void *av, void *bv);
11
12indexdata *make_index(void) {
13 indexdata *ret = mknew(indexdata);
14 ret->tags = newtree234(compare_tags);
15 ret->entries = newtree234(compare_entries);
16 return ret;
17}
18
19static indextag *make_indextag(void) {
20 indextag *ret = mknew(indextag);
21 ret->name = NULL;
22 ret->implicit_text = NULL;
23 ret->explicit_texts = NULL;
24 ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
25 ret->refs = NULL;
26 return ret;
27}
28
29static int compare_tags(void *av, void *bv) {
30 indextag *a = (indextag *)av, *b = (indextag *)bv;
31 return ustricmp(a->name, b->name);
32}
33
34static int compare_to_find_tag(void *av, void *bv) {
35 wchar_t *a = (wchar_t *)av;
36 indextag *b = (indextag *)bv;
37 return ustricmp(a, b->name);
38}
39
40static int compare_entries(void *av, void *bv) {
41 indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
42 return compare_wordlists(a->text, b->text);
43}
44
45/*
46 * Back-end utility: find the indextag with a given name.
47 */
48indextag *index_findtag(indexdata *idx, wchar_t *name) {
49 return find234(idx->tags, name, compare_to_find_tag);
50}
51
52/*
53 * Add a \IM. `tags' points to a zero-terminated chain of
54 * zero-terminated strings ("first\0second\0thirdandlast\0\0").
55 * `text' points to a word list.
56 *
57 * Guarantee on calling sequence: all implicit merges are given
58 * before the explicit ones.
59 */
60void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
61 indextag *t, *existing;
62
63 /*
64 * FIXME: want to warn on overlapping source sets.
65 */
66 for (; *tags; tags = uadv(tags)) {
67 t = make_indextag();
68 t->name = tags;
69 existing = add234(idx->tags, t);
70 if (existing == t) {
71 /*
72 * Duplicate this so we can free it independently.
73 */
74 t->name = ustrdup(tags);
75
76 /*
77 * Every tag has an implicit \IM. So if this tag
78 * doesn't exist and we're explicit, then we should
79 * warn (and drop it, since it won't be referenced).
80 */
81 if (is_explicit) {
82 error(err_nosuchidxtag, tags);
83 continue;
84 }
85
86 /*
87 * Otherwise, this is a new tag with an implicit \IM.
88 */
89 t->implicit_text = text;
90 } else {
91 sfree(t);
92 t = existing;
93 if (!is_explicit) {
94 /*
95 * An implicit \IM for a tag that's had an implicit
96 * \IM before. FIXME: we should check the text
97 * against the existing text and warn on
98 * differences. And check the tag for case match
99 * against the existing tag, likewise.
100 */
101 } else {
102 /*
103 * An explicit \IM added to a valid tag. In
104 * particular, this removes the implicit \IM if
105 * present.
106 */
107 if (t->implicit_text) {
108 free_word_list(t->implicit_text);
109 t->implicit_text = NULL;
110 }
111 if (t->nexplicit >= t->explicit_size) {
112 t->explicit_size = t->nexplicit + 8;
113 t->explicit_texts = resize(t->explicit_texts,
114 t->explicit_size);
115 }
116 t->explicit_texts[t->nexplicit++] = text;
117 }
118 }
119 }
120}
121
122/*
123 * Build the final-form index. We now have every tag, with every
124 * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
125 * the RHSes of the \IMs, and sort by final form, and decorate the
126 * entries in the original 2-3 tree with pointers to the RHS
127 * entries.
128 */
129void build_index(indexdata *i) {
130 indextag *t;
131 word **ta;
132 int ti;
133 int j;
134
135 for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
136 if (t->implicit_text) {
137 t->nrefs = 1;
138 ta = &t->implicit_text;
139 } else {
140 t->nrefs = t->nexplicit;
141 ta = t->explicit_texts;
142 }
143 if (t->nrefs) {
144 t->refs = mknewa(indexentry *, t->nrefs);
145 for (j = 0; j < t->nrefs; j++) {
146 indexentry *ent = mknew(indexentry);
147 ent->text = *ta++;
148 t->refs[j] = add234(i->entries, ent);
149 if (t->refs[j] != ent) /* duplicate */
150 sfree(ent);
151 }
152 }
153 }
154}
155
156void cleanup_index(indexdata *i) {
157 indextag *t;
158 indexentry *ent;
159 int ti;
160
161 for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
162 sfree(t->name);
163 free_word_list(t->implicit_text);
164 sfree(t->explicit_texts);
165 sfree(t->refs);
166 sfree(t);
167 }
168 freetree234(i->tags);
169 for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) {
170 sfree(ent);
171 }
172 freetree234(i->entries);
173 sfree(i);
174}
175
176static void dbg_prtwordlist(int level, word *w);
177static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text);
178
179void index_debug(indexdata *i) {
180 indextag *t;
181 indexentry *y;
182 int ti;
183 int j;
184
185 printf("\nINDEX TAGS\n==========\n\n");
186 for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
187 printf("\n");
188 if (t->implicit_text)
189 dbg_prtmerge(0, t->name, t->implicit_text);
190 for (j = 0; j < t->nexplicit; j++)
191 dbg_prtmerge(1, t->name, t->explicit_texts[j]);
192 }
193
194 printf("\nINDEX ENTRIES\n=============\n\n");
195 for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) {
196 printf("\n");
197 printf("{\n");
198 dbg_prtwordlist(1, y->text);
199 printf("}\n");
200 }
201}
202
203static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
204 printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
205 for (; *tag; tag++)
206 putchar(*tag);
207 printf("\" {\n");
208 dbg_prtwordlist(1, text);
209 printf("}\n");
210}
211
212static void dbg_prtwordlist(int level, word *w) {
213 for (; w; w = w->next) {
214 wchar_t *wp;
215 printf("%*sword %d ", level*4, "", w->type);
216 if (w->text) {
217 printf("\"");
218 for (wp = w->text; *wp; wp++)
219 putchar(*wp);
220 printf("\"");
221 } else
222 printf("(no text)");
223 if (w->alt) {
224 printf(" alt = {\n");
225 dbg_prtwordlist(level+1, w->alt);
226 printf("%*s}", level*4, "");
227 }
228 printf("\n");
229 }
230}