/* -*-c-*-
*
- * $Id: sym.c,v 1.4 1999/05/06 19:51:35 mdw Exp $
+ * $Id: sym.c,v 1.8 1999/08/02 14:45:48 mdw Exp $
*
* Symbol table management
*
/*----- Revision history --------------------------------------------------*
*
* $Log: sym.c,v $
+ * Revision 1.8 1999/08/02 14:45:48 mdw
+ * Break low-level hashtable code out from sym.
+ *
+ * Revision 1.7 1999/06/01 09:49:08 mdw
+ * Allow things to be looked up by just their caller-supplied hashes. This
+ * actually needs to be thought through better.
+ *
+ * Revision 1.6 1999/05/26 21:08:31 mdw
+ * Rename symbols in line with newer conventions.
+ *
+ * Revision 1.5 1999/05/13 22:48:37 mdw
+ * Twiddle the extension threshold. Change `-ise' to `-ize' throughout.
+ *
* Revision 1.4 1999/05/06 19:51:35 mdw
* Reformatted the LGPL notice a little bit.
*
/* --- Local headers --- */
#include "alloc.h"
+#include "bits.h"
#include "crc32.h"
#include "exc.h"
+#include "hash.h"
#include "sub.h"
#include "sym.h"
#include "track.h"
* so that it can be used to mask of the bottom bits of a hash value.
*/
-#define SYM_INITSZ 255 /* Size of a new hash table */
+#define SYM_INITSZ 64 /* Size of a new hash table */
/* --- Maximum load factor --- *
*
* and the limit %$l$% satisfy the relation %$n < bl$%; if a new item is
* added to the table and this relation is found to be false, the table is
* doubled in size.
- *
- * The current function gives %$l = {3n \over 4}$%, which appears to be
- * reasonable on the face of things.
*/
-#define SYM_LIMIT(n) (((n) * 3) >> 2) /* Load factor for growing table */
+#define SYM_LIMIT(n) ((n) * 2) /* Load factor for growing table */
/*----- Main code ---------------------------------------------------------*/
-/* --- @sym_createTable@ --- *
+/* --- @sym_create@ --- *
*
- * Arguments: @sym_table *t@ = symbol table to initialise
+ * Arguments: @sym_table *t@ = symbol table to initialize
*
* Returns: ---
*
- * Use: Initialises the given symbol table. Raises @EXC_NOMEM@ if
+ * Use: Initializes the given symbol table. Raises @EXC_NOMEM@ if
* there isn't enough memory.
*/
-void sym_createTable(sym_table *t)
+void sym_create(sym_table *t)
{
- size_t i;
-
TRACK_CTX("symbol table creation");
TRACK_PUSH;
-
- t->mask = SYM_INITSZ;
- t->c = SYM_LIMIT(SYM_INITSZ);
- t->a = xmalloc((t->mask + 1) * sizeof(sym_base *));
-
- for (i = 0; i < SYM_INITSZ + 1; i++)
- t->a[i] = 0;
-
+ hash_create(&t->t, SYM_INITSZ);
+ t->load = SYM_LIMIT(SYM_INITSZ);
TRACK_POP;
}
-/* --- @sym_destroyTable@ --- *
+/* --- @sym_destroy@ --- *
*
* Arguments: @sym_table *t@ = pointer to symbol table in question
*
* occupy.
*/
-void sym_destroyTable(sym_table *t)
+void sym_destroy(sym_table *t)
{
- size_t i;
- sym_base *p, *q;
+ sym_iter i;
- TRACK_CTX("symbol table deletion");
+ TRACK_CTX("symbol table destruction");
TRACK_PUSH;
- for (i = 0; i <= t->mask; i++) {
- p = t->a[i];
- while (p) {
- q = p->next;
- if (p->len > SYM_BUFSZ)
- sub_free(p->name.p, p->len);
- free(p);
- p = q;
- }
+ SYM_MKITER(&i, t);
+ for (;;) {
+ sym_base *p;
+ SYM_NEXT(&i, p);
+ if (!p)
+ break;
+ if (p->len > SYM_BUFSZ)
+ sub_free(p->name.p, p->len);
+ free(p);
}
- free(t->a);
+ hash_destroy(&t->t);
TRACK_POP;
}
void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
{
- unsigned long hash; /* Hash value for user's name */
- size_t len = l < 0 ? strlen(n) + 1 : l; /* Find length of user's name */
- sym_base *bin; /* Bin containing our item */
- sym_base *p, *q; /* Pointer wandering through list */
+ uint32 hash;
+ size_t len = 0;
+ hash_base **bin, **p;
+ sym_base *q;
/* --- Find the correct bin --- */
- CRC32(hash, 0, n, len); /* Find hash value for this name */
- bin = p = (sym_base *)(t->a + (hash & t->mask));
+ len = l < 0 ? strlen(n) + 1 : l;
+ CRC32(hash, 0, n, len);
+ bin = HASH_BIN(&t->t, hash);
/* --- Search the bin list --- */
- while (p->next) {
- if (hash == p->next->hash && /* Check full hash values first */
- len == p->next->len && /* Then compare string lengths */
- !memcmp(n, SYM_NAME(p->next), len)) /* And finally compare names */
- {
+ for (p = bin; *p; p = &(*p)->next) {
+ q = (sym_base *)*p;
+ if (hash == q->b.hash && len == q->len && !memcmp(n, SYM_NAME(q), len)) {
+
/* --- Found a match --- *
*
* As a minor, and probably pointless, tweak, move the item to the
* front of its bin list.
*/
- q = p->next; /* Find the actual symbol block */
- p->next = q->next; /* Extract block from bin list */
- q->next = bin->next; /* Set up symbol's next pointer */
- bin->next = q; /* And reinsert the block */
+ (*p) = q->b.next;
+ q->b.next = *bin;
+ *bin = &q->b;
/* --- Return the block --- */
- if (f) *f = 1; /* Didn't fail to find the item */
- return (q); /* And return the block */
+ if (f) *f = 1;
+ return (q);
}
-
- p = p->next; /* Move onto the next item */
}
/* --- Couldn't find the item there --- */
- if (f) *f = 0; /* Failed to find the block */
- if (!sz) return (0); /* Return zero if not creating */
+ if (f) *f = 0;
+ if (!sz) return (0);
- /* --- Create a new symbol block and initialise it --- */
+ /* --- Create a new symbol block and initialize it --- */
{
TRACK_CTX("new symbol creation");
TRACK_PUSH;
- p = xmalloc(sz); /* Create a new symbol block */
- p->next = bin->next; /* Set up the next pointer */
- p->hash = hash; /* Set up the hash value too */
- p->len = len; /* And set up the string length */
- if (len <= SYM_BUFSZ)
- memcpy(p->name.b, n, len); /* And copy the string over */
- else {
- TRY {
- p->name.p = sub_alloc(len); /* Allocate a block for the name */
- memcpy(p->name.p, n, len); /* And copy the string over */
- } CATCH {
- free(p);
- TRACK_POP;
- RETHROW;
- } END_TRY;
+ q = xmalloc(sz);
+ q->b.next = *bin;
+ q->b.hash = hash;
+ q->len = len;
+ if (n) {
+ if (len <= SYM_BUFSZ)
+ memcpy(q->name.b, n, len);
+ else {
+ TRY {
+ q->name.p = sub_alloc(len);
+ memcpy(q->name.p, n, len);
+ } CATCH {
+ free(q);
+ TRACK_POP;
+ RETHROW;
+ } END_TRY;
+ }
}
TRACK_POP;
}
- bin->next = p; /* Put the pointer into the bin */
+ *bin = &q->b;
/* --- Consider growing the array --- */
- if (t->c)
- t->c--;
- if (!t->c) {
- unsigned long m = t->mask + 1; /* Next set bit in has word */
- sym_base *p, *q, *r; /* More useful pointers */
- size_t i, lim; /* Loop counter and limit */
-
- TRACK_CTX("symbol table extension");
- TRACK_PUSH;
-
- /* --- Update values in the anchor block --- */
-
- TRY {
- t->a = xrealloc(t->a, (t->mask + 1) * 2 * sizeof(sym_base *));
- } CATCH switch (exc_type) {
- case EXC_NOMEM:
- TRACK_POP;
- return (bin->next);
- default:
- TRACK_POP;
- RETHROW;
- } END_TRY;
-
- t->c = SYM_LIMIT(t->mask + 1); /* Set load value */
- t->mask = (t->mask + 1) * 2 - 1; /* Set the new mask value */
-
- /* --- Now wander through the table rehashing things --- *
- *
- * This loop is very careful to avoid problems with aliasing. The items
- * are dealt with from the end backwards to avoid overwriting bins before
- * they've been processed.
- */
-
- lim = (t->mask + 1) >> 1;
- for (i = 0; i < lim; i++) {
-
- /* --- Some initialisation --- */
-
- r = t->a[i]; /* Find the list we're dissecting */
- p = (sym_base *)(t->a + i); /* Find bit-clear list */
- q = (sym_base *)(t->a + i + lim); /* And the bit-set lsit */
-
- /* --- Now go through the @r@ list --- */
-
- while (r) {
- if (r->hash & m) /* Is the next bit set? */
- q = q->next = r; /* Yes, so fit up previous link */
- else
- p = p->next = r; /* No, so fit up previous link */
- r = r->next; /* Move onto the next item */
- }
- p->next = q->next = 0; /* Null terminate the new lists */
- }
-
- TRACK_POP;
- }
+ if (t->load)
+ t->load--;
+ if (!t->load && hash_extend(&t->t))
+ t->load = SYM_LIMIT(t->t.mask / 2 + 1);
/* --- Finished that, so return the new symbol block --- */
- return (p);
+ return (q);
}
/* --- @sym_remove@ --- *
*
* Arguments: @sym_table *i@ = pointer to a symbol table object
- * @void *b@ = pointer to symbol table entry
+ * @void *p@ = pointer to symbol table entry
*
* Returns: ---
*
* to the entry should already be gone by this point.
*/
-void sym_remove(sym_table *t, void *b)
+void sym_remove(sym_table *t, void *p)
{
- /* --- A quick comment --- *
- *
- * Since the @sym_base@ block contains the hash, finding the element in the
- * bin list is really quick -- it's not worth bothering with things like
- * doubly linked lists.
- */
-
- sym_base *p = b;
- sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask));
-
- /* --- Find the item in the bin list --- */
-
- while (bin->next) {
- if (bin->next == p)
- break;
- bin = bin->next;
- }
- if (!bin->next)
- return;
-
- /* --- Now just remove the item from the list and free it --- *
- *
- * Oh, and bump the load counter.
- */
-
- bin->next = p->next;
- if (p->len > SYM_BUFSZ)
- sub_free(p->name.p, p->len);
- free(p);
- t->c++;
+ sym_base *q = p;
+ hash_remove(&t->t, &q->b);
+ if (q->len > SYM_BUFSZ)
+ sub_free(q->name.p, q->len);
+ free(q);
+ t->load++;
}
-/* --- @sym_createIter@ --- *
+/* --- @sym_mkiter@ --- *
*
* Arguments: @sym_iter *i@ = pointer to an iterator object
* @sym_table *t@ = pointer to a symbol table object
* iterate through a symbol table.
*/
-void sym_createIter(sym_iter *i, sym_table *t)
-{
- i->t = t;
- i->i = 0;
- i->n = 0;
-}
+void sym_mkiter(sym_iter *i, sym_table *t) { SYM_MKITER(i, t); }
/* --- @sym_next@ --- *
*
void *sym_next(sym_iter *i)
{
- sym_base *p;
-
- /* --- Find the next item --- */
-
- while (!i->n) {
- if (i->i > i->t->mask)
- return (0);
- i->n = i->t->a[i->i++];
- }
-
- /* --- Update the iterator block --- */
-
- p = i->n;
- i->n = p->next;
-
- /* --- Done --- */
-
+ void *p;
+ SYM_NEXT(i, p);
return (p);
}
sym_table tbl;
int entries;
- /* --- Initialise for reading the file --- */
+ /* --- Initialize for reading the file --- */
sz = BUFSIZ;
buff = xmalloc(sz + 1);
flag[i] = 0;
entries = 0;
- sym_createTable(&tbl);
+ sym_create(&tbl);
for (;;) {
i = (unsigned)rand() % sz;
case 0: {
sym_word *w;
- printf("find `%s'\n", line[i]);
- if ((rand() & 1023) == 0) {
- putchar('.'); fflush(stdout);
- }
+ printf("? %s\n", line[i]);
w = sym_find(&tbl, line[i], -1, 0, 0);
if (w != flag[i])
unsigned f;
sym_word *w;
- printf("create `%s'\n", line[i]);
- if ((rand() & 1023) == 0) {
- putchar('+'); fflush(stdout);
- }
+ printf("+ %s\n", line[i]);
w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f);
if (f)
v = (rand() % entries) == 0;
if (!v)
break;
- printf("\niterated %i entries\n", entries);
- break;
- printf("iterate\n");
+ printf(".\n");
ntbl = xmalloc(sz * sizeof(sym_word *));
memcpy(ntbl, flag, sz * sizeof(sym_word *));
- sym_createIter(&it, &tbl);
+ sym_mkiter(&it, &tbl);
while ((w = sym_next(&it)) != 0) {
if (ntbl[w->i] == 0)
- printf("*** error: iterate returned duff item %i\n", w->i);
- else
+ printf("*** error: iterate returned duff item %s\n", SYM_NAME(w));
+ else {
+ printf(": %s\n", SYM_NAME(w));
ntbl[w->i] = 0;
+ }
}
for (i = 0; i < sz; i++)
- if (ntbl[i]) printf("*** error: iterate didn't return item %i\n",
- i);
+ if (ntbl[i]) printf("*** error: iterate didn't return item %s\n",
+ SYM_NAME(ntbl[i]));
free(ntbl);
} break;
printf("dump\n");
- for (i = 0; i <= tbl.mask; i++) {
- if (!tbl.a[i]) continue;
+ for (i = 0; i <= tbl.b.mask; i++) {
+ if (!tbl.b.v[i]) continue;
if (v) printf(" %i: ", i);
- b = tbl.a[i];
+ b = (sym_base *)tbl.b.v[i];
while (b) {
- if ((b->hash & tbl.mask) != i)
+ if ((b->b.hash & tbl.b.mask) != i)
printf("*** error: bad hash value found");
if (v) printf("`%s'(%08lx:%lu) ",
line[((sym_word *)b)->i],
- b->hash,
- b->hash & tbl.mask);
- b = b->next;
+ b->b.hash,
+ b->b.hash & tbl.b.mask);
+ b = (sym_base *)b->b.next;
}
if (v) putchar('\n');
}
case 4: {
if (flag[i]) {
- printf("remove `%s'\n", SYM_NAME(&flag[i]->base));
+ printf("- %s\n", SYM_NAME(&flag[i]->base));
if ((rand() & 1023) == 0) {
putchar('-'); fflush(stdout);
}