mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* index.c: create and collate index data structures
	3	*/
	4
	5	#include <stdio.h>
	6	#include <stdlib.h>
	7	#include "halibut.h"
	8
	9	static int compare_tags(void av, void bv);
	10	static int compare_entries(void av, void bv);
	11
	12	indexdata *make_index(void) {
	13	indexdata *ret = snew(indexdata);
	14	ret->tags = newtree234(compare_tags);
	15	ret->entries = newtree234(compare_entries);
	16	return ret;
	17	}
	18
	19	static indextag *make_indextag(void) {
	20	indextag *ret = snew(indextag);
	21	ret->name = NULL;
	22	ret->implicit_text = NULL;
	23	ret->explicit_texts = NULL;
	24	ret->explicit_fpos = NULL;
	25	ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
	26	ret->refs = NULL;
	27	return ret;
	28	}
	29
	30	static int compare_tags(void av, void bv) {
	31	indextag a = (indextag )av, b = (indextag )bv;
	32	return ustricmp(a->name, b->name);
	33	}
	34
	35	static int compare_to_find_tag(void av, void bv) {
	36	wchar_t a = (wchar_t )av;
	37	indextag b = (indextag )bv;
	38	return ustricmp(a, b->name);
	39	}
	40
	41	static int compare_entries(void av, void bv) {
	42	indexentry a = (indexentry )av, b = (indexentry )bv;
	43	return compare_wordlists(a->text, b->text);
	44	}
	45
	46	/*
	47	* Back-end utility: find the indextag with a given name.
	48	*/
	49	indextag index_findtag(indexdata idx, wchar_t *name) {
	50	return find234(idx->tags, name, compare_to_find_tag);
	51	}
	52
	53	/*
	54	* Add a \IM. `tags' points to a zero-terminated chain of
	55	* zero-terminated strings ("first\0second\0thirdandlast\0\0").
	56	* `text' points to a word list.
	57	*
	58	* Guarantee on calling sequence: all implicit merges are given
	59	* before the explicit ones.
	60	*/
	61	void index_merge(indexdata idx, int is_explicit, wchar_t tags, word *text,
	62	filepos *fpos) {
	63	indextag t, existing;
	64
	65	/*
	66	* For an implicit merge, we want to remove all emphasis,
	67	* because the chances are that the user didn't really want to
	68	* index the term as emphasised.
	69	*/
	70	{
	71	word *w;
	72
	73	for (w = text; w; w = w->next) {
	74	if (w->type == word_Emph)
	75	w->type = word_Normal;
	76	else if (w->type == word_EmphSpace)
	77	w->type = word_WhiteSpace;
	78	else if (w->type == word_EmphQuote)
	79	w->type = word_Quote;
	80	}
	81	}
	82
	83	/*
	84	* FIXME: want to warn on overlapping source sets.
	85	*/
	86	for (; *tags; tags = uadv(tags)) {
	87	t = make_indextag();
	88	t->name = tags;
	89	existing = add234(idx->tags, t);
	90	if (existing == t) {
	91	/*
	92	* Duplicate this so we can free it independently.
	93	*/
	94	t->name = ustrdup(tags);
	95
	96	/*
	97	* Every tag has an implicit \IM. So if this tag
	98	* doesn't exist and we're explicit, then we should
	99	* warn (and drop it, since it won't be referenced).
	100	*/
	101	if (is_explicit) {
	102	error(err_nosuchidxtag, fpos, tags);
	103	continue;
	104	}
	105
	106	/*
	107	* Otherwise, this is a new tag with an implicit \IM.
	108	*/
	109	t->implicit_text = text;
	110	t->implicit_fpos = *fpos;
	111	} else {
	112	if (!is_explicit) {
	113	/*
	114	* An implicit \IM for a tag that's had an implicit
	115	* \IM before. FIXME: we should check the text
	116	* against the existing text and warn on
	117	* differences. And check the tag for case match
	118	* against the existing tag, likewise.
	119	*/
	120
	121	/*
	122	* Check the tag against its previous occurrence to
	123	* see if the cases match.
	124	*/
	125	if (ustrcmp(t->name, existing->name)) {
	126	error(err_indexcase, fpos, t->name,
	127	&existing->implicit_fpos, existing->name);
	128	}
	129
	130	sfree(t);
	131	} else {
	132	/*
	133	* An explicit \IM added to a valid tag. In
	134	* particular, this removes the implicit \IM if
	135	* present.
	136	*/
	137	sfree(t);
	138	t = existing;
	139	if (t->implicit_text) {
	140	free_word_list(t->implicit_text);
	141	t->implicit_text = NULL;
	142	}
	143	if (t->nexplicit >= t->explicit_size) {
	144	t->explicit_size = t->nexplicit + 8;
	145	t->explicit_texts = sresize(t->explicit_texts,
	146	t->explicit_size, word *);
	147	t->explicit_fpos = sresize(t->explicit_fpos,
	148	t->explicit_size, filepos);
	149	}
	150	t->explicit_texts[t->nexplicit] = text;
	151	t->explicit_fpos[t->nexplicit] = *fpos;
	152	t->nexplicit++;
	153	}
	154	}
	155	}
	156	}
	157
	158	/*
	159	* Build the final-form index. We now have every tag, with every
	160	* \IM, set up in a 2-3 tree indexed by tag. We now want to collate
	161	* the RHSes of the \IMs, and sort by final form, and decorate the
	162	* entries in the original 2-3 tree with pointers to the RHS
	163	* entries.
	164	*/
	165	void build_index(indexdata *i) {
	166	indextag *t;
	167	word **ta;
	168	filepos *fa;
	169	int ti;
	170	int j;
	171
	172	for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
	173	if (t->implicit_text) {
	174	t->nrefs = 1;
	175	ta = &t->implicit_text;
	176	fa = &t->implicit_fpos;
	177	} else {
	178	t->nrefs = t->nexplicit;
	179	ta = t->explicit_texts;
	180	fa = t->explicit_fpos;
	181	}
	182	if (t->nrefs) {
	183	t->refs = snewn(t->nrefs, indexentry *);
	184	for (j = 0; j < t->nrefs; j++) {
	185	indexentry *ent = snew(indexentry);
	186	ent->text = *ta++;
	187	ent->fpos = *fa++;
	188	t->refs[j] = add234(i->entries, ent);
	189	if (t->refs[j] != ent) /* duplicate */
	190	sfree(ent);
	191	}
	192	}
	193	}
	194	}
	195
	196	void cleanup_index(indexdata *i) {
	197	indextag *t;
	198	indexentry *ent;
	199	int ti;
	200
	201	for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
	202	sfree(t->name);
	203	free_word_list(t->implicit_text);
	204	sfree(t->explicit_texts);
	205	sfree(t->refs);
	206	sfree(t);
	207	}
	208	freetree234(i->tags);
	209	for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) {
	210	sfree(ent);
	211	}
	212	freetree234(i->entries);
	213	sfree(i);
	214	}
	215
	216	static void dbg_prtwordlist(int level, word *w);
	217	static void dbg_prtmerge(int is_explicit, wchar_t tag, word text);
	218
	219	void index_debug(indexdata *i) {
	220	indextag *t;
	221	indexentry *y;
	222	int ti;
	223	int j;
	224
	225	printf("\nINDEX TAGS\n==========\n\n");
	226	for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
	227	printf("\n");
	228	if (t->implicit_text)
	229	dbg_prtmerge(0, t->name, t->implicit_text);
	230	for (j = 0; j < t->nexplicit; j++)
	231	dbg_prtmerge(1, t->name, t->explicit_texts[j]);
	232	}
	233
	234	printf("\nINDEX ENTRIES\n=============\n\n");
	235	for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) {
	236	printf("\n");
	237	printf("{\n");
	238	dbg_prtwordlist(1, y->text);
	239	printf("}\n");
	240	}
	241	}
	242
	243	static void dbg_prtmerge(int is_explicit, wchar_t tag, word text) {
	244	printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
	245	for (; *tag; tag++)
	246	putchar(*tag);
	247	printf("\" {\n");
	248	dbg_prtwordlist(1, text);
	249	printf("}\n");
	250	}
	251
	252	static void dbg_prtwordlist(int level, word *w) {
	253	for (; w; w = w->next) {
	254	wchar_t *wp;
	255	printf("%sword %d ", level4, "", w->type);
	256	if (w->text) {
	257	printf("\"");
	258	for (wp = w->text; *wp; wp++)
	259	putchar(*wp);
	260	printf("\"");
	261	} else
	262	printf("(no text)");
	263	if (w->alt) {
	264	printf(" alt = {\n");
	265	dbg_prtwordlist(level+1, w->alt);
	266	printf("%s}", level4, "");
	267	}
	268	printf("\n");
	269	}
	270	}