2 * index.c: create and collate index data structures
9 static int compare_tags(void *av
, void *bv
);
10 static int compare_entries(void *av
, void *bv
);
12 indexdata
*make_index(void) {
13 indexdata
*ret
= mknew(indexdata
);
14 ret
->tags
= newtree234(compare_tags
);
15 ret
->entries
= newtree234(compare_entries
);
19 static indextag
*make_indextag(void) {
20 indextag
*ret
= mknew(indextag
);
22 ret
->implicit_text
= NULL
;
23 ret
->explicit_texts
= NULL
;
24 ret
->explicit_fpos
= NULL
;
25 ret
->nexplicit
= ret
->explicit_size
= ret
->nrefs
= 0;
30 static int compare_tags(void *av
, void *bv
) {
31 indextag
*a
= (indextag
*)av
, *b
= (indextag
*)bv
;
32 return ustricmp(a
->name
, b
->name
);
35 static int compare_to_find_tag(void *av
, void *bv
) {
36 wchar_t *a
= (wchar_t *)av
;
37 indextag
*b
= (indextag
*)bv
;
38 return ustricmp(a
, b
->name
);
41 static int compare_entries(void *av
, void *bv
) {
42 indexentry
*a
= (indexentry
*)av
, *b
= (indexentry
*)bv
;
43 return compare_wordlists(a
->text
, b
->text
);
47 * Back-end utility: find the indextag with a given name.
49 indextag
*index_findtag(indexdata
*idx
, wchar_t *name
) {
50 return find234(idx
->tags
, name
, compare_to_find_tag
);
54 * Add a \IM. `tags' points to a zero-terminated chain of
55 * zero-terminated strings ("first\0second\0thirdandlast\0\0").
56 * `text' points to a word list.
58 * Guarantee on calling sequence: all implicit merges are given
59 * before the explicit ones.
61 void index_merge(indexdata
*idx
, int is_explicit
, wchar_t *tags
, word
*text
,
63 indextag
*t
, *existing
;
66 * For an implicit merge, we want to remove all emphasis,
67 * because the chances are that the user didn't really want to
68 * index the term as emphasised.
73 for (w
= text
; w
; w
= w
->next
) {
74 if (w
->type
== word_Emph
)
75 w
->type
= word_Normal
;
76 else if (w
->type
== word_EmphSpace
)
77 w
->type
= word_WhiteSpace
;
78 else if (w
->type
== word_EmphQuote
)
84 * FIXME: want to warn on overlapping source sets.
86 for (; *tags
; tags
= uadv(tags
)) {
89 existing
= add234(idx
->tags
, t
);
92 * Duplicate this so we can free it independently.
94 t
->name
= ustrdup(tags
);
97 * Every tag has an implicit \IM. So if this tag
98 * doesn't exist and we're explicit, then we should
99 * warn (and drop it, since it won't be referenced).
102 error(err_nosuchidxtag
, tags
);
107 * Otherwise, this is a new tag with an implicit \IM.
109 t
->implicit_text
= text
;
110 t
->implicit_fpos
= *fpos
;
116 * An implicit \IM for a tag that's had an implicit
117 * \IM before. FIXME: we should check the text
118 * against the existing text and warn on
119 * differences. And check the tag for case match
120 * against the existing tag, likewise.
124 * An explicit \IM added to a valid tag. In
125 * particular, this removes the implicit \IM if
128 if (t
->implicit_text
) {
129 free_word_list(t
->implicit_text
);
130 t
->implicit_text
= NULL
;
132 if (t
->nexplicit
>= t
->explicit_size
) {
133 t
->explicit_size
= t
->nexplicit
+ 8;
134 t
->explicit_texts
= resize(t
->explicit_texts
,
136 t
->explicit_fpos
= resize(t
->explicit_fpos
,
139 t
->explicit_texts
[t
->nexplicit
] = text
;
140 t
->explicit_fpos
[t
->nexplicit
] = *fpos
;
148 * Build the final-form index. We now have every tag, with every
149 * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
150 * the RHSes of the \IMs, and sort by final form, and decorate the
151 * entries in the original 2-3 tree with pointers to the RHS
154 void build_index(indexdata
*i
) {
161 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
162 if (t
->implicit_text
) {
164 ta
= &t
->implicit_text
;
165 fa
= &t
->implicit_fpos
;
167 t
->nrefs
= t
->nexplicit
;
168 ta
= t
->explicit_texts
;
169 fa
= t
->explicit_fpos
;
172 t
->refs
= mknewa(indexentry
*, t
->nrefs
);
173 for (j
= 0; j
< t
->nrefs
; j
++) {
174 indexentry
*ent
= mknew(indexentry
);
177 t
->refs
[j
] = add234(i
->entries
, ent
);
178 if (t
->refs
[j
] != ent
) /* duplicate */
185 void cleanup_index(indexdata
*i
) {
190 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
192 free_word_list(t
->implicit_text
);
193 sfree(t
->explicit_texts
);
197 freetree234(i
->tags
);
198 for (ti
= 0; (ent
= (indexentry
*)index234(i
->entries
, ti
))!=NULL
; ti
++) {
201 freetree234(i
->entries
);
205 static void dbg_prtwordlist(int level
, word
*w
);
206 static void dbg_prtmerge(int is_explicit
, wchar_t *tag
, word
*text
);
208 void index_debug(indexdata
*i
) {
214 printf("\nINDEX TAGS\n==========\n\n");
215 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
217 if (t
->implicit_text
)
218 dbg_prtmerge(0, t
->name
, t
->implicit_text
);
219 for (j
= 0; j
< t
->nexplicit
; j
++)
220 dbg_prtmerge(1, t
->name
, t
->explicit_texts
[j
]);
223 printf("\nINDEX ENTRIES\n=============\n\n");
224 for (ti
= 0; (y
= (indexentry
*)index234(i
->entries
, ti
)) != NULL
; ti
++) {
227 dbg_prtwordlist(1, y
->text
);
232 static void dbg_prtmerge(int is_explicit
, wchar_t *tag
, word
*text
) {
233 printf("\\IM: %splicit: \"", is_explicit ?
"ex" : "im");
237 dbg_prtwordlist(1, text
);
241 static void dbg_prtwordlist(int level
, word
*w
) {
242 for (; w
; w
= w
->next
) {
244 printf("%*sword %d ", level
*4, "", w
->type
);
247 for (wp
= w
->text
; *wp
; wp
++)
253 printf(" alt = {\n");
254 dbg_prtwordlist(level
+1, w
->alt
);
255 printf("%*s}", level
*4, "");