2 * index.c: create and collate index data structures
9 static int compare_tags(void *av
, void *bv
);
10 static int compare_entries(void *av
, void *bv
);
12 indexdata
*make_index(void) {
13 indexdata
*ret
= mknew(indexdata
);
14 ret
->tags
= newtree234(compare_tags
);
15 ret
->entries
= newtree234(compare_entries
);
19 static indextag
*make_indextag(void) {
20 indextag
*ret
= mknew(indextag
);
22 ret
->implicit_text
= NULL
;
23 ret
->explicit_texts
= NULL
;
24 ret
->nexplicit
= ret
->explicit_size
= ret
->nrefs
= 0;
29 static int compare_tags(void *av
, void *bv
) {
30 indextag
*a
= (indextag
*)av
, *b
= (indextag
*)bv
;
31 return ustricmp(a
->name
, b
->name
);
34 static int compare_to_find_tag(void *av
, void *bv
) {
35 wchar_t *a
= (wchar_t *)av
;
36 indextag
*b
= (indextag
*)bv
;
37 return ustricmp(a
, b
->name
);
40 static int compare_entries(void *av
, void *bv
) {
41 indexentry
*a
= (indexentry
*)av
, *b
= (indexentry
*)bv
;
42 return compare_wordlists(a
->text
, b
->text
);
46 * Back-end utility: find the indextag with a given name.
48 indextag
*index_findtag(indexdata
*idx
, wchar_t *name
) {
49 return find234(idx
->tags
, name
, compare_to_find_tag
);
53 * Add a \IM. `tags' points to a zero-terminated chain of
54 * zero-terminated strings ("first\0second\0thirdandlast\0\0").
55 * `text' points to a word list.
57 * Guarantee on calling sequence: all implicit merges are given
58 * before the explicit ones.
60 void index_merge(indexdata
*idx
, int is_explicit
, wchar_t *tags
, word
*text
) {
61 indextag
*t
, *existing
;
64 * For an implicit merge, we want to remove all emphasis,
65 * because the chances are that the user didn't really want to
66 * index the term as emphasised.
71 for (w
= text
; w
; w
= w
->next
) {
72 if (w
->type
== word_Emph
)
73 w
->type
= word_Normal
;
74 else if (w
->type
== word_EmphSpace
)
75 w
->type
= word_WhiteSpace
;
76 else if (w
->type
== word_EmphQuote
)
82 * FIXME: want to warn on overlapping source sets.
84 for (; *tags
; tags
= uadv(tags
)) {
87 existing
= add234(idx
->tags
, t
);
90 * Duplicate this so we can free it independently.
92 t
->name
= ustrdup(tags
);
95 * Every tag has an implicit \IM. So if this tag
96 * doesn't exist and we're explicit, then we should
97 * warn (and drop it, since it won't be referenced).
100 error(err_nosuchidxtag
, tags
);
105 * Otherwise, this is a new tag with an implicit \IM.
107 t
->implicit_text
= text
;
113 * An implicit \IM for a tag that's had an implicit
114 * \IM before. FIXME: we should check the text
115 * against the existing text and warn on
116 * differences. And check the tag for case match
117 * against the existing tag, likewise.
121 * An explicit \IM added to a valid tag. In
122 * particular, this removes the implicit \IM if
125 if (t
->implicit_text
) {
126 free_word_list(t
->implicit_text
);
127 t
->implicit_text
= NULL
;
129 if (t
->nexplicit
>= t
->explicit_size
) {
130 t
->explicit_size
= t
->nexplicit
+ 8;
131 t
->explicit_texts
= resize(t
->explicit_texts
,
134 t
->explicit_texts
[t
->nexplicit
++] = text
;
141 * Build the final-form index. We now have every tag, with every
142 * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
143 * the RHSes of the \IMs, and sort by final form, and decorate the
144 * entries in the original 2-3 tree with pointers to the RHS
147 void build_index(indexdata
*i
) {
153 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
154 if (t
->implicit_text
) {
156 ta
= &t
->implicit_text
;
158 t
->nrefs
= t
->nexplicit
;
159 ta
= t
->explicit_texts
;
162 t
->refs
= mknewa(indexentry
*, t
->nrefs
);
163 for (j
= 0; j
< t
->nrefs
; j
++) {
164 indexentry
*ent
= mknew(indexentry
);
166 t
->refs
[j
] = add234(i
->entries
, ent
);
167 if (t
->refs
[j
] != ent
) /* duplicate */
174 void cleanup_index(indexdata
*i
) {
179 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
181 free_word_list(t
->implicit_text
);
182 sfree(t
->explicit_texts
);
186 freetree234(i
->tags
);
187 for (ti
= 0; (ent
= (indexentry
*)index234(i
->entries
, ti
))!=NULL
; ti
++) {
190 freetree234(i
->entries
);
194 static void dbg_prtwordlist(int level
, word
*w
);
195 static void dbg_prtmerge(int is_explicit
, wchar_t *tag
, word
*text
);
197 void index_debug(indexdata
*i
) {
203 printf("\nINDEX TAGS\n==========\n\n");
204 for (ti
= 0; (t
= (indextag
*)index234(i
->tags
, ti
)) != NULL
; ti
++) {
206 if (t
->implicit_text
)
207 dbg_prtmerge(0, t
->name
, t
->implicit_text
);
208 for (j
= 0; j
< t
->nexplicit
; j
++)
209 dbg_prtmerge(1, t
->name
, t
->explicit_texts
[j
]);
212 printf("\nINDEX ENTRIES\n=============\n\n");
213 for (ti
= 0; (y
= (indexentry
*)index234(i
->entries
, ti
)) != NULL
; ti
++) {
216 dbg_prtwordlist(1, y
->text
);
221 static void dbg_prtmerge(int is_explicit
, wchar_t *tag
, word
*text
) {
222 printf("\\IM: %splicit: \"", is_explicit ?
"ex" : "im");
226 dbg_prtwordlist(1, text
);
230 static void dbg_prtwordlist(int level
, word
*w
) {
231 for (; w
; w
= w
->next
) {
233 printf("%*sword %d ", level
*4, "", w
->type
);
236 for (wp
= w
->text
; *wp
; wp
++)
242 printf(" alt = {\n");
243 dbg_prtwordlist(level
+1, w
->alt
);
244 printf("%*s}", level
*4, "");