2 * index.c: create and collate index data structures
9 static int compare_tags(void *av
, void *bv
);
10 static int compare_entries(void *av
, void *bv
);
12 indexdata
*make_index(void) {
13 indexdata
*ret
= snew(indexdata
);
14 ret
->tags
= newtree234(compare_tags
);
15 ret
->entries
= newtree234(compare_entries
);
19 static indextag
*make_indextag(void) {
20 indextag
*ret
= snew(indextag
);
22 ret
->implicit_text
= NULL
;
23 ret
->explicit_texts
= NULL
;
24 ret
->explicit_fpos
= NULL
;
25 ret
->nexplicit
= ret
->explicit_size
= ret
->nrefs
= 0;
30 static int compare_tags(void *av
, void *bv
) {
31 indextag
*a
= (indextag
*)av
, *b
= (indextag
*)bv
;
32 return ustricmp(a
->name
, b
->name
);
35 static int compare_to_find_tag(void *av
, void *bv
) {
36 wchar_t *a
= (wchar_t *)av
;
37 indextag
*b
= (indextag
*)bv
;
38 return ustricmp(a
, b
->name
);
41 static int compare_entries(void *av
, void *bv
) {
42 indexentry
*a
= (indexentry
*)av
, *b
= (indexentry
*)bv
;
43 return compare_wordlists(a
->text
, b
->text
);
47 * Back-end utility: find the indextag with a given name.
49 indextag
*index_findtag(indexdata
*idx
, wchar_t *name
) {
50 return find234(idx
->tags
, name
, compare_to_find_tag
);
54 * Add a \IM. `tags' points to a zero-terminated chain of
55 * zero-terminated strings ("first\0second\0thirdandlast\0\0").
56 * `text' points to a word list.
58 * Guarantee on calling sequence: all implicit merges are given
59 * before the explicit ones.
61 void index_merge(indexdata
*idx
, int is_explicit
, wchar_t *tags
, word
*text
,
63 indextag
*t
, *existing
;
66 * For an implicit merge, we want to remove all emphasis,
67 * because the chances are that the user didn't really want to
68 * index the term as emphasised.
73 for (w
= text
; w
; w
= w
->next
) {
74 if (w
->type
== word_Emph
)
75 w
->type
= word_Normal
;
76 else if (w
->type
== word_EmphSpace
)
77 w
->type
= word_WhiteSpace
;
78 else if (w
->type
== word_EmphQuote
)
84 * FIXME: want to warn on overlapping source sets.
86 for (; *tags
; tags
= uadv(tags
)) {
89 existing
= add234(idx
->tags
, t
);
92 * Duplicate this so we can free it independently.
94 t
->name
= ustrdup(tags
);
97 * Every tag has an implicit \IM. So if this tag
98 * doesn't exist and we're explicit, then we should
99 * warn (and drop it, since it won't be referenced).
102 error(err_nosuchidxtag
, fpos
, tags
);
107 * Otherwise, this is a new tag with an implicit \IM.
109 t
->implicit_text
= text
;
110 t
->implicit_fpos
= *fpos
;
114 * An implicit \IM for a tag that's had an implicit
115 * \IM before. FIXME: we should check the text
116 * against the existing text and warn on
117 * differences. And check the tag for case match
118 * against the existing tag, likewise.
122 * Check the tag against its previous occurrence to
123 * see if the cases match.
125 if (ustrcmp(t
->name
, existing
->name
)) {
126 error(err_indexcase
, fpos
, t
->name
,
127 &existing
->implicit_fpos
, existing
->name
);
133 * An explicit \IM added to a valid tag. In
134 * particular, this removes the implicit \IM if
139 if (t
->implicit_text
) {
140 free_word_list(t
->implicit_text
);
141 t
->implicit_text
= NULL
;
143 if (t
->nexplicit
>= t
->explicit_size
) {
144 t
->explicit_size
= t
->nexplicit
+ 8;
145 t
->explicit_texts
= sresize(t
->explicit_texts
,
146 t
->explicit_size
, word
*);
147 t
->explicit_fpos
= sresize(t
->explicit_fpos
,
148 t
->explicit_size
, filepos
);
150 t
->explicit_texts
[t
->nexplicit
] = text
;
151 t
->explicit_fpos
[t
->nexplicit
] = *fpos
;
159 * Build the final-form index. We now have every tag, with every
160 * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
161 * the RHSes of the \IMs, and sort by final form, and decorate the
162 * entries in the original 2-3 tree with pointers to the RHS
165 void build_index(indexdata
*i
) {
172 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
173 if (t
->implicit_text
) {
175 ta
= &t
->implicit_text
;
176 fa
= &t
->implicit_fpos
;
178 t
->nrefs
= t
->nexplicit
;
179 ta
= t
->explicit_texts
;
180 fa
= t
->explicit_fpos
;
183 t
->refs
= snewn(t
->nrefs
, indexentry
*);
184 for (j
= 0; j
< t
->nrefs
; j
++) {
185 indexentry
*ent
= snew(indexentry
);
188 t
->refs
[j
] = add234(i
->entries
, ent
);
189 if (t
->refs
[j
] != ent
) /* duplicate */
196 void cleanup_index(indexdata
*i
) {
201 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
203 free_word_list(t
->implicit_text
);
204 sfree(t
->explicit_texts
);
208 freetree234(i
->tags
);
209 for (ti
= 0; (ent
= (indexentry
*)index234(i
->entries
, ti
))!=NULL
; ti
++) {
212 freetree234(i
->entries
);
216 static void dbg_prtwordlist(int level
, word
*w
);
217 static void dbg_prtmerge(int is_explicit
, wchar_t *tag
, word
*text
);
219 void index_debug(indexdata
*i
) {
225 printf("\nINDEX TAGS\n==========\n\n");
226 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
228 if (t
->implicit_text
)
229 dbg_prtmerge(0, t
->name
, t
->implicit_text
);
230 for (j
= 0; j
< t
->nexplicit
; j
++)
231 dbg_prtmerge(1, t
->name
, t
->explicit_texts
[j
]);
234 printf("\nINDEX ENTRIES\n=============\n\n");
235 for (ti
= 0; (y
= (indexentry
*)index234(i
->entries
, ti
)) != NULL
; ti
++) {
238 dbg_prtwordlist(1, y
->text
);
243 static void dbg_prtmerge(int is_explicit
, wchar_t *tag
, word
*text
) {
244 printf("\\IM: %splicit: \"", is_explicit ?
"ex" : "im");
248 dbg_prtwordlist(1, text
);
252 static void dbg_prtwordlist(int level
, word
*w
) {
253 for (; w
; w
= w
->next
) {
255 printf("%*sword %d ", level
*4, "", w
->type
);
258 for (wp
= w
->text
; *wp
; wp
++)
264 printf(" alt = {\n");
265 dbg_prtwordlist(level
+1, w
->alt
);
266 printf("%*s}", level
*4, "");