--- /dev/null
+/* -*-c-*-
+ *
+ * $Id: hash.c,v 1.1 1999/08/02 14:45:48 mdw Exp $
+ *
+ * General hashtable infrastructure
+ *
+ * (c) 1999 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Revision history --------------------------------------------------*
+ *
+ * $Log: hash.c,v $
+ * Revision 1.1 1999/08/02 14:45:48 mdw
+ * Break low-level hashtable code out from sym.
+ *
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc.h"
+#include "bits.h"
+#include "exc.h"
+#include "hash.h"
+#include "track.h"
+
+/*----- Main code ---------------------------------------------------------*/
+
+/* --- @hash_create@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable to initialize
+ * @size_t n@ = number of bins to allocate (zero initially)
+ *
+ * Returns: ---
+ *
+ * Use: Initializes a hashtable with a given number of bins. The
+ * bins are initially empty. The number of bins must be a power
+ * of two. This is not checked.
+ */
+
+void hash_create(hash_table *t, size_t n)
+{
+ hash_base **v;
+ TRACK_CTX("hashtable creation");
+ TRACK_PUSH;
+ t->v = xmalloc(n * sizeof(hash_base *));
+ t->mask = n - 1;
+ for (v = t->v; n; v++, n--)
+ *v = 0;
+ TRACK_POP;
+}
+
+/* --- @hash_destroy@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable to destroy
+ *
+ * Returns: ---
+ *
+ * Use: Frees a hashtable. The items are not freed: they are the
+ * responsibility of the implementation.
+ */
+
+void hash_destroy(hash_table *t)
+{
+ TRACK_CTX("hashtable destruction");
+ TRACK_PUSH;
+ free(t->v);
+ TRACK_POP;
+}
+
+/* --- @hash_bin@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable
+ * @uint32 hash@ = hash value to look up
+ *
+ * Returns: Pointer to the bin's list head.
+ *
+ * Use: Given a hash value return the address of the appropriate list
+ * head. It is safe to remove the current entry from the table.
+ */
+
+hash_base **hash_bin(hash_table *t, uint32 hash)
+{
+ return (HASH_BIN(t, hash));
+}
+
+/* --- @hash_extend@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable to extend
+ *
+ * Returns: Nonzero if extension was successful.
+ *
+ * Use: Extends a hashtable. The number of bins is doubled and the
+ * entries are redistributed.
+ */
+
+int hash_extend(hash_table *t)
+{
+ hash_base **v;
+ uint32 m = t->mask + 1;
+ size_t i;
+
+ /* --- Push in a tracking context --- */
+
+ TRACK_CTX("hashtable extension");
+ TRACK_PUSH;
+
+ /* --- Allocate a new hash bin vector --- */
+
+ if ((v = realloc(t->v, m * 2 * sizeof(hash_base *))) == 0) {
+ TRACK_POP;
+ return (0);
+ }
+ t->v = v;
+ t->mask = (m * 2) - 1;
+
+ /* --- Wander through the table rehashing things --- */
+
+ for (i = 0; i < m; i++) {
+ hash_base **p = v + i;
+ hash_base **q = p + m;
+
+ while (*p) {
+ if (!((*p)->hash & m))
+ p = &(*p)->next;
+ else {
+ *q = *p;
+ q = &(*q)->next;
+ *p = (*p)->next;
+ }
+ }
+ *p = 0;
+ *q = 0;
+ }
+
+ TRACK_POP;
+ return (1);
+}
+
+/* --- @hash_remove@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable
+ * @hash_base *p@ = pointer to item to remove
+ *
+ * Returns: ---
+ *
+ * Use: Removes an item from a hashtable. The item itself is not
+ * freed, although it is removed from the data structure and is
+ * safe to free.
+ */
+
+void hash_remove(hash_table *t, hash_base *p)
+{
+ hash_base **b = HASH_BIN(t, p->hash);
+ while (*b) {
+ if (*b == p) {
+ *b = p->next;
+ return;
+ }
+ b = &(*b)->next;
+ }
+ return;
+}
+
+/* --- @hash_mkiter@ --- *
+ *
+ * Arguments: @hash_iter *i@ = pointer to iterator to create
+ * @hash_table *t@ = pointer to hashtable to iterate
+ *
+ * Returns: ---
+ *
+ * Use: Initializes a hashtable iterator.
+ */
+
+void hash_mkiter(hash_iter *i, hash_table *t) { HASH_MKITER(i, t); }
+
+/* --- @hash_next@ --- *
+ *
+ * Arguments: @hash_iter *i@ = pointer to iterator
+ *
+ * Returns: Pointer to the next hashtable entry found, or null.
+ *
+ * Use: Returns the next entry from the hashtable.
+ */
+
+hash_base *hash_next(hash_iter *i)
+{
+ hash_base *b;
+ HASH_NEXT(i, b);
+ return (b);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
--- /dev/null
+/* -*-c-*-
+ *
+ * $Id: hash.h,v 1.1 1999/08/02 14:45:48 mdw Exp $
+ *
+ * General hashtable infrastructure
+ *
+ * (c) 1999 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Revision history --------------------------------------------------*
+ *
+ * $Log: hash.h,v $
+ * Revision 1.1 1999/08/02 14:45:48 mdw
+ * Break low-level hashtable code out from sym.
+ *
+ */
+
+#ifndef HASH_H
+#define HASH_H
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/*----- Notes -------------------------------------------------------------*
+ *
+ * This isn't a complete implementation of anything. It's a collection of
+ * useful types and functions which will help when building hashtables of
+ * things. The general-purpose hashtable is provided by the @sym@ functions.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <stddef.h>
+
+#ifndef BITS_H
+# include "bits.h"
+#endif
+
+/*----- Data structures ---------------------------------------------------*/
+
+/* --- Hashtable basics --- *
+ *
+ * This contains the bare minimum to actually get anything useful done. In
+ * particular it doesn't maintain any weighting information: when to extend
+ * the table is left up to the particular implementation.
+ */
+
+typedef struct hash_table {
+ uint32 mask; /* Bit mask of hash bits */
+ struct hash_base **v; /* Vector of hash bins */
+} hash_table;
+
+/* --- A hashtable entry --- *
+ *
+ * This is the bare minimum of what needs to be remembered in each hashtable
+ * entry. Comparison of elements is left to the implementation, so I don't
+ * need to know anything about how to represent hash keys here.
+ */
+
+typedef struct hash_base {
+ struct hash_base *next; /* Next entry in hash bin */
+ uint32 hash; /* Hash value for this entry */
+} hash_base;
+
+/* --- A hashtable iterator --- */
+
+typedef struct hash_iter {
+ hash_table *t; /* Hashtable being iterated */
+ hash_base *p; /* Address of next item to return */
+ size_t i; /* Index of next hash bin to use */
+} hash_iter;
+
+/*----- Functions provided ------------------------------------------------*/
+
+/* --- @hash_create@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable to initialize
+ * @size_t n@ = number of bins to allocate (zero initially)
+ *
+ * Returns: ---
+ *
+ * Use: Initializes a hashtable with a given number of bins. The
+ * bins are initially empty. The number of bins must be a power
+ * of two. This is not checked.
+ */
+
+extern void hash_create(hash_table */*t*/, size_t /*n*/);
+
+/* --- @hash_destroy@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable to destroy
+ *
+ * Returns: ---
+ *
+ * Use: Frees a hashtable. The items are not freed: they are the
+ * responsibility of the implementation.
+ */
+
+extern void hash_destroy(hash_table */*t*/);
+
+/* --- @hash_bin@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable
+ * @uint32 hash@ = hash value to look up
+ *
+ * Returns: Pointer to the bin's list head.
+ *
+ * Use: Given a hash value return the address of the appropriate list
+ * head. It is safe to remove the current entry from the table.
+ */
+
+#define HASH_BIN(t, hash) ((t)->v + ((hash) & (t)->mask))
+
+extern hash_base **hash_bin(hash_table */*t*/, uint32 /*hash*/);
+
+/* --- @hash_extend@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable to extend
+ *
+ * Returns: Nonzero if extension was successful.
+ *
+ * Use: Extends a hashtable. The number of bins is doubled and the
+ * entries are redistributed.
+ */
+
+extern int hash_extend(hash_table */*t*/);
+
+/* --- @hash_remove@ --- *
+ *
+ * Arguments: @hash_table *t@ = pointer to hashtable
+ * @hash_base *p@ = pointer to item to remove
+ *
+ * Returns: ---
+ *
+ * Use: Removes an item from a hashtable. The item itself is not
+ * freed, although it is removed from the data structure and is
+ * safe to free.
+ */
+
+extern void hash_remove(hash_table */*t*/, hash_base */*p*/);
+
+/* --- @hash_mkiter@ --- *
+ *
+ * Arguments: @hash_iter *i@ = pointer to iterator to create
+ * @hash_table *t@ = pointer to hashtable to iterate
+ *
+ * Returns: ---
+ *
+ * Use: Initializes a hashtable iterator.
+ */
+
+#define HASH_MKITER(i_, t_) ((i_)->t = (t_), (i_)->p = 0, (i_)->i = 0)
+
+extern void hash_mkiter(hash_iter */*i*/, hash_table */*t*/);
+
+/* --- @hash_next@ --- *
+ *
+ * Arguments: @hash_iter *i@ = pointer to iterator
+ *
+ * Returns: Pointer to the next hashtable entry found, or null.
+ *
+ * Use: Returns the next entry from the hashtable.
+ */
+
+#define HASH_NEXT(i_, b_) do { \
+ hash_iter *_i = (i_); \
+ hash_base *_p; \
+ hash_table *_t = _i->t; \
+ uint32 _m = _t->mask; \
+ \
+ for (;;) { \
+ if (_i->p) { \
+ _p = _i->p; \
+ _i->p = _p->next; \
+ break; \
+ } else if (_i->i > _m) { \
+ _p = 0; \
+ break; \
+ } else \
+ _i->p = _t->v[_i->i++]; \
+ } \
+ (b_) = _p; \
+} while (0)
+
+extern hash_base *hash_next(hash_iter */*i*/);
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif
--- /dev/null
+.\" -*-nroff-*-
+.de VS
+.sp 1
+.RS
+.nf
+.ft B
+..
+.de VE
+.ft R
+.fi
+.RE
+.sp 1
+..
+.de hP
+.IP
+.ft B
+\h'-\w'\\$1\ 'u'\\$1\ \c
+.ft P
+..
+.ie t .ds o \(bu
+.el .ds o o
+.TH hash 3 "2 August 1999" mLib
+.SH "NAME"
+hash \- low-level hashtable implementation
+.\" @hash_create
+.\" @hash_destroy
+.\" @hash_bin
+.\" @hash_extend
+.\" @hash_remove
+.\" @hash_mkiter
+.\" @hash_next
+.\"
+.\" @HASH_BIN
+.\" @HASH_MKITER
+.\" @HASH_NEXT
+.\"
+.SH "SYNOPSIS"
+.nf
+.B "#include <mLib/hash.h>"
+
+.BI "void hash_create(hash_table *" t ", size_t " n );
+.BI "void hash_destroy(hash_table *" t );
+.BI "hash_base **hash_bin(hash_table *" t ", uint32 " hash );
+.BI "int hash_extend(hash_table *" t );
+.BI "void hash_remove(hash_table *" t ", hash_base * " b );
+.BI "void hash_mkiter(hash_iter *" i ", hash_table *" t );
+.BI "hash_base *hash_next(hash_iter *" i );
+
+.BI "hash_base **HASH_BIN(hash_table *" t ", uint32 " hash );
+.BI "void HASH_MKITER(hash_iter *" i ", hash_table *" t );
+.BI "void HASH_NEXT(hash_iter *" i ", " b );
+.fi
+.SH "OVERVIEW"
+The
+.B hash
+functions provide the basis for an extensible hashtable implementation.
+The implementation is not complete. Many decisions have been left to
+the user, including:
+.hP \*o
+How keys should be represented, hashed and compared.
+.hP \*o
+How objects contained within the table should be allocated.
+.hP \*o
+When the hashtable should be extended.
+.PP
+A complete hashtable implementation will need to take the above
+decisions. If you just want a prepackaged solution, see
+.BR sym (3)
+which provides one.
+.SH "IMPLEMENTATION DETAILS"
+Each item in the hashtable is assigned a 32-bit integer
+.IR hash :
+a number computed somehow from the item's data such that two items which
+are considered equal will yield the same hash. Ideally, different items
+will yield different hashes. It is important for this implementation
+that all bits of the hash are similarly good.
+.PP
+In order to look up an item, the high bits of the hash are masked off
+and the remainder used as an index into a vector of
+.IR "bin lists" .
+Each bin contains a list of items with identical low-order bits of their
+hashes.
+.PP
+A table expansion involves doubling the size of the bin vector. Each
+bin list is then split into two, items being placed into a new bin
+depending on the setting of the next lowest hash bit. By expanding the
+hashtable as needed, lookups remain constant-time.
+.PP
+A low-level hashtable is represented by a
+.B hash_table
+structure. It contains two members:
+.TP
+.B mask
+The current bitmask to be applied to hashes. This is one less than the
+current number of bins in the hashtable, and is applied to hash values
+in order to decide which bin an item should be in.
+.TP
+.B v
+The bin vector. It is an array of pointers to hashtable items.
+.PP
+A hashtable item consists of a
+.B hash_base
+structure. A full hashtable implementation will need to extend this
+structure by adding keying information and other data; the
+.B hash_base
+only contains the bare minimum of information needed to maintain the
+hashtable at a low level. It contains the following members:
+.TP
+.B next
+Pointer to the next item in the bin list. The final item has a null
+.B next
+pointer. The entry in the bin vector is null if the bin list is empty.
+It is up to the high-level implementation to insert items into the list.
+.TP
+.B hash
+The hash for this item. This must be the full 32-bit hash for the
+current item. It is used during hashtable expansion to determine which
+bin an item should be moved to.
+.SH "FUNCTIONALITY PROVIDED"
+This section describes the functions and macros provided for building
+hashtables. Code examples are given throughout. They assume the
+following definitions:
+.VS
+/* --- A table of items --- */
+
+typedef struct item_table {
+ hash_table t;
+ size_t load;
+};
+
+/* --- An item --- */
+
+typedef struct item {
+ hash_base b;
+ const char *k;
+ /* ... */
+} item;
+.VE
+The implementation presented here is simple but relatively bad. The
+source file
+.B sym.c
+presents a more realistic example, but is rather more complex.
+.SS "Initialization and finalization"
+An empty hashtable is initialized by calling
+.B hash_create
+with the address of a
+.B hash_table
+structure to be filled in and the initial number of hash bins to create.
+.PP
+For example, an item table might be initialized like this:
+.VS
+void item_createtab(item_table *t)
+{
+ hash_create(&t->t, ITEM_INITSZ);
+ t->load = ITEM_INITLOAD;
+}
+.VE
+A hashtable can be destroyed by calling
+.B hash_destroy
+with the address of the
+.B hash_table
+structure. This does not attempt to deallocate the individual items;
+that must be done beforehand.
+.PP
+The usual way to deallocate the individual hashtable items is using the
+iteration constructs described below.
+.VS
+void item_destroytab(item_table *t)
+{
+ hash_iter i;
+ hash_base *b;
+ for (hash_mkiter(&i, &t->t); (b = hash_next(&i)) != 0; ) {
+ item *ii = (item *)b;
+ free(ii->k);
+ /* ... */
+ DESTROY(ii);
+ }
+ hash_destroy(&t->t);
+}
+.VE
+.sp -1
+.SS "Searching, adding and removing"
+Items must be searched for and added by hand.
+.PP
+The macro
+.B HASH_BIN
+returns the address of the bin list haed for a particular hashtable and
+hash value. The function
+.B hash_bin
+works the same way and provides the same result, but since the macro is
+very simple its use is preferred. However, it will evaluate its
+arguments multiple times.
+.PP
+Once the bin list has been found, it's fairly easy to search for an
+exact match. A simple search might look something like this:
+.VS
+item *lookup(item_table *t, const char *k)
+{
+ uint32 h = hash(k); /* Hash @k@ somehow */
+ hash_base **bin = HASH_BIN(&t->t, h);
+ hash_base *b;
+ for (b = *bin; b; b = b->next) {
+ item *i = (item *)b;
+ if (h == i->b.hash && strcmp(k, i->k) == 0)
+ return (i);
+ }
+ return (0);
+}
+.VE
+Insertion is also relatively trivial given the bin list head. Insertion
+may make the hashtable too large, so it might be necessary to extend
+it. Extension is performed by
+.BR hash_extend ,
+which is passed only the address of the hashtable. It returns nonzero
+if extension was successful.
+.VS
+item *add(item_table *t, const char *k, /* ... */)
+{
+ item *i;
+ uint32 h;
+ hash_base **bin;
+
+ /* --- See if the item is already there --- */
+
+ if ((i = = lookup(t, k)) != 0)
+ return (i);
+
+ /* --- Make a new hashtable item --- */
+
+ i = CREATE(item);
+ i->k = xstrdup(k);
+ /* ... */
+
+ /* --- Link it into the bin list --- */
+
+ h = i->b.hash = hash(k);
+ bin = HASH_BIN(&t->t, h);
+ i->b.next = *bin;
+ *bin = &i->b.next;
+
+ /* --- Maybe extend the hashtable --- */
+
+ if (t->load)
+ t->load--;
+ else if (hash_extend(&t->t))
+ t->load = recalc_load(t);
+
+ /* --- Done --- */
+
+ return (i);
+}
+.VE
+The
+.B sym
+implementation is rather more sophisticated in its approach but the idea
+is the same. In particular,
+.B sym
+provides a single interface for lookup and insertion which prevents the
+unnecessary rehashing performed by the above code.
+.PP
+Removal of items is more straightforward. The function
+.B hash_remove
+will unlink a given item from its bin list, after which point it is safe
+to remove.
+.SS "Iteration"
+Iteration allows code to be performed on all the items in a hashtable.
+This is done using an
+.I iterator
+object, represented by a
+.B hash_iter
+structure. An iterator is initialized by calling
+.BR hash_mkiter .
+Each call to
+.B hash_next
+yields a different item from the hashtable until there are none left, a
+condition signified by a null return value.
+.PP
+The macros
+.B HASH_MKITER
+and
+.B HASH_NEXT
+do the same jobs as the above functions. However,
+.B HASH_NEXT
+has a slightly bizarre argument passing convention: its second argument
+is an
+.I lvalue
+which is updated to contain the address of the next item.
+.PP
+The finalization code above contained an example of iteration.
+.SH "SEE ALSO"
+.BR sym (3),
+.BR mLib (3).
+.SH "AUTHOR"
+Mark Wooding, <mdw@nsict.org>
/* -*-c-*-
*
- * $Id: sym.c,v 1.7 1999/06/01 09:49:08 mdw Exp $
+ * $Id: sym.c,v 1.8 1999/08/02 14:45:48 mdw Exp $
*
* Symbol table management
*
/*----- Revision history --------------------------------------------------*
*
* $Log: sym.c,v $
+ * Revision 1.8 1999/08/02 14:45:48 mdw
+ * Break low-level hashtable code out from sym.
+ *
* Revision 1.7 1999/06/01 09:49:08 mdw
* Allow things to be looked up by just their caller-supplied hashes. This
* actually needs to be thought through better.
#include "bits.h"
#include "crc32.h"
#include "exc.h"
+#include "hash.h"
#include "sub.h"
#include "sym.h"
#include "track.h"
* so that it can be used to mask of the bottom bits of a hash value.
*/
-#define SYM_INITSZ 255 /* Size of a new hash table */
+#define SYM_INITSZ 64 /* Size of a new hash table */
/* --- Maximum load factor --- *
*
* doubled in size.
*/
-#define SYM_LIMIT(n) ((n) * 4) /* Load factor for growing table */
+#define SYM_LIMIT(n) ((n) * 2) /* Load factor for growing table */
/*----- Main code ---------------------------------------------------------*/
void sym_create(sym_table *t)
{
- size_t i;
-
TRACK_CTX("symbol table creation");
TRACK_PUSH;
-
- t->mask = SYM_INITSZ;
- t->c = SYM_LIMIT(SYM_INITSZ);
- t->a = xmalloc((t->mask + 1) * sizeof(sym_base *));
-
- for (i = 0; i < SYM_INITSZ + 1; i++)
- t->a[i] = 0;
-
+ hash_create(&t->t, SYM_INITSZ);
+ t->load = SYM_LIMIT(SYM_INITSZ);
TRACK_POP;
}
void sym_destroy(sym_table *t)
{
- size_t i;
- sym_base *p, *q;
+ sym_iter i;
- TRACK_CTX("symbol table deletion");
+ TRACK_CTX("symbol table destruction");
TRACK_PUSH;
- for (i = 0; i <= t->mask; i++) {
- p = t->a[i];
- while (p) {
- q = p->next;
- if (p->len > SYM_BUFSZ)
- sub_free(p->name.p, p->len);
- free(p);
- p = q;
- }
+ SYM_MKITER(&i, t);
+ for (;;) {
+ sym_base *p;
+ SYM_NEXT(&i, p);
+ if (!p)
+ break;
+ if (p->len > SYM_BUFSZ)
+ sub_free(p->name.p, p->len);
+ free(p);
}
- free(t->a);
+ hash_destroy(&t->t);
TRACK_POP;
}
* may be given, in which case the name may contain arbitrary
* binary data, or it may be given as a negative number, in
* which case the length of the name is calculated as
- * @strlen(n) + 1@. The name pointer @n@ may also be zero; in
- * this case, @l@ is taken to be a raw hash, and any element
- * with a matching hash is taken to be the one wanted.
+ * @strlen(n) + 1@.
*
* The return value is the address of a pointer to a @sym_base@
* block (which may have other things on the end, as above). If
{
uint32 hash;
size_t len = 0;
- sym_base *bin;
- sym_base *p, *q;
+ hash_base **bin, **p;
+ sym_base *q;
/* --- Find the correct bin --- */
- if (n) {
- len = l < 0 ? strlen(n) + 1 : l;
- CRC32(hash, 0, n, len);
- } else
- hash = (uint32)l;
- bin = p = (sym_base *)(t->a + (hash & t->mask));
+ len = l < 0 ? strlen(n) + 1 : l;
+ CRC32(hash, 0, n, len);
+ bin = HASH_BIN(&t->t, hash);
/* --- Search the bin list --- */
- while (p->next) {
- if (hash == p->next->hash &&
- (n == 0 || (len == p->next->len &&
- !memcmp(n, SYM_NAME(p->next), len))))
- {
+ for (p = bin; *p; p = &(*p)->next) {
+ q = (sym_base *)*p;
+ if (hash == q->b.hash && len == q->len && !memcmp(n, SYM_NAME(q), len)) {
+
/* --- Found a match --- *
*
* As a minor, and probably pointless, tweak, move the item to the
* front of its bin list.
*/
- q = p->next;
- p->next = q->next;
- q->next = bin->next;
- bin->next = q;
+ (*p) = q->b.next;
+ q->b.next = *bin;
+ *bin = &q->b;
/* --- Return the block --- */
if (f) *f = 1;
return (q);
}
-
- p = p->next;
}
/* --- Couldn't find the item there --- */
TRACK_CTX("new symbol creation");
TRACK_PUSH;
- p = xmalloc(sz);
- p->next = bin->next;
- p->hash = hash;
- p->len = len;
+ q = xmalloc(sz);
+ q->b.next = *bin;
+ q->b.hash = hash;
+ q->len = len;
if (n) {
if (len <= SYM_BUFSZ)
- memcpy(p->name.b, n, len);
+ memcpy(q->name.b, n, len);
else {
TRY {
- p->name.p = sub_alloc(len);
- memcpy(p->name.p, n, len);
+ q->name.p = sub_alloc(len);
+ memcpy(q->name.p, n, len);
} CATCH {
- free(p);
+ free(q);
TRACK_POP;
RETHROW;
} END_TRY;
TRACK_POP;
}
- bin->next = p;
+ *bin = &q->b;
/* --- Consider growing the array --- */
- if (t->c)
- t->c--;
- if (!t->c) {
- uint32 m = t->mask + 1;
- sym_base *p, *q, *r;
- size_t i, lim;
-
- TRACK_CTX("symbol table extension");
- TRACK_PUSH;
-
- /* --- Update values in the anchor block --- */
-
- TRY {
- t->a = xrealloc(t->a, (t->mask + 1) * 2 * sizeof(sym_base *));
- } CATCH switch (exc_type) {
- case EXC_NOMEM:
- TRACK_POP;
- return (bin->next);
- default:
- TRACK_POP;
- RETHROW;
- } END_TRY;
-
- t->c = SYM_LIMIT(t->mask + 1);
- t->mask = (t->mask + 1) * 2 - 1;
-
- /* --- Now wander through the table rehashing things --- *
- *
- * This loop is very careful to avoid problems with aliasing. The items
- * are dealt with from the end backwards to avoid overwriting bins before
- * they've been processed.
- */
-
- lim = (t->mask + 1) >> 1;
- for (i = 0; i < lim; i++) {
-
- /* --- Some initialization --- */
-
- r = t->a[i];
- p = (sym_base *)(t->a + i);
- q = (sym_base *)(t->a + i + lim);
-
- /* --- Now go through the @r@ list --- */
-
- while (r) {
- if (r->hash & m)
- q = q->next = r;
- else
- p = p->next = r;
- r = r->next;
- }
- p->next = q->next = 0;
- }
-
- TRACK_POP;
- }
+ if (t->load)
+ t->load--;
+ if (!t->load && hash_extend(&t->t))
+ t->load = SYM_LIMIT(t->t.mask / 2 + 1);
/* --- Finished that, so return the new symbol block --- */
- return (p);
+ return (q);
}
/* --- @sym_remove@ --- *
*
* Arguments: @sym_table *i@ = pointer to a symbol table object
- * @void *b@ = pointer to symbol table entry
+ * @void *p@ = pointer to symbol table entry
*
* Returns: ---
*
* to the entry should already be gone by this point.
*/
-void sym_remove(sym_table *t, void *b)
+void sym_remove(sym_table *t, void *p)
{
- /* --- A quick comment --- *
- *
- * Since the @sym_base@ block contains the hash, finding the element in the
- * bin list is really quick -- it's not worth bothering with things like
- * doubly linked lists.
- */
-
- sym_base *p = b;
- sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask));
-
- /* --- Find the item in the bin list --- */
-
- while (bin->next) {
- if (bin->next == p)
- break;
- bin = bin->next;
- }
- if (!bin->next)
- return;
-
- /* --- Now just remove the item from the list and free it --- *
- *
- * Oh, and bump the load counter.
- */
-
- bin->next = p->next;
- if (p->len > SYM_BUFSZ)
- sub_free(p->name.p, p->len);
- free(p);
- t->c++;
+ sym_base *q = p;
+ hash_remove(&t->t, &q->b);
+ if (q->len > SYM_BUFSZ)
+ sub_free(q->name.p, q->len);
+ free(q);
+ t->load++;
}
/* --- @sym_mkiter@ --- *
* iterate through a symbol table.
*/
-void sym_mkiter(sym_iter *i, sym_table *t)
-{
- i->t = t;
- i->i = 0;
- i->n = 0;
-}
+void sym_mkiter(sym_iter *i, sym_table *t) { SYM_MKITER(i, t); }
/* --- @sym_next@ --- *
*
void *sym_next(sym_iter *i)
{
- sym_base *p;
-
- /* --- Find the next item --- */
-
- while (!i->n) {
- if (i->i > i->t->mask)
- return (0);
- i->n = i->t->a[i->i++];
- }
-
- /* --- Update the iterator block --- */
-
- p = i->n;
- i->n = p->next;
-
- /* --- Done --- */
-
+ void *p;
+ SYM_NEXT(i, p);
return (p);
}
case 0: {
sym_word *w;
- printf("find `%s'\n", line[i]);
- if ((rand() & 1023) == 0) {
- putchar('.'); fflush(stdout);
- }
+ printf("? %s\n", line[i]);
w = sym_find(&tbl, line[i], -1, 0, 0);
if (w != flag[i])
unsigned f;
sym_word *w;
- printf("create `%s'\n", line[i]);
- if ((rand() & 1023) == 0) {
- putchar('+'); fflush(stdout);
- }
+ printf("+ %s\n", line[i]);
w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f);
if (f)
v = (rand() % entries) == 0;
if (!v)
break;
- printf("\niterated %i entries\n", entries);
- break;
- printf("iterate\n");
+ printf(".\n");
ntbl = xmalloc(sz * sizeof(sym_word *));
memcpy(ntbl, flag, sz * sizeof(sym_word *));
while ((w = sym_next(&it)) != 0) {
if (ntbl[w->i] == 0)
- printf("*** error: iterate returned duff item %i\n", w->i);
- else
+ printf("*** error: iterate returned duff item %s\n", SYM_NAME(w));
+ else {
+ printf(": %s\n", SYM_NAME(w));
ntbl[w->i] = 0;
+ }
}
for (i = 0; i < sz; i++)
- if (ntbl[i]) printf("*** error: iterate didn't return item %i\n",
- i);
+ if (ntbl[i]) printf("*** error: iterate didn't return item %s\n",
+ SYM_NAME(ntbl[i]));
free(ntbl);
} break;
printf("dump\n");
- for (i = 0; i <= tbl.mask; i++) {
- if (!tbl.a[i]) continue;
+ for (i = 0; i <= tbl.b.mask; i++) {
+ if (!tbl.b.v[i]) continue;
if (v) printf(" %i: ", i);
- b = tbl.a[i];
+ b = (sym_base *)tbl.b.v[i];
while (b) {
- if ((b->hash & tbl.mask) != i)
+ if ((b->b.hash & tbl.b.mask) != i)
printf("*** error: bad hash value found");
if (v) printf("`%s'(%08lx:%lu) ",
line[((sym_word *)b)->i],
- b->hash,
- b->hash & tbl.mask);
- b = b->next;
+ b->b.hash,
+ b->b.hash & tbl.b.mask);
+ b = (sym_base *)b->b.next;
}
if (v) putchar('\n');
}
case 4: {
if (flag[i]) {
- printf("remove `%s'\n", SYM_NAME(&flag[i]->base));
+ printf("- %s\n", SYM_NAME(&flag[i]->base));
if ((rand() & 1023) == 0) {
putchar('-'); fflush(stdout);
}
/* -*-c-*-
*
- * $Id: sym.h,v 1.7 1999/06/01 09:49:33 mdw Exp $
+ * $Id: sym.h,v 1.8 1999/08/02 14:45:48 mdw Exp $
*
* Symbol table management
*
/*----- Revision history --------------------------------------------------*
*
* $Log: sym.h,v $
+ * Revision 1.8 1999/08/02 14:45:48 mdw
+ * Break low-level hashtable code out from sym.
+ *
* Revision 1.7 1999/06/01 09:49:33 mdw
* Allow things to be looked up by just their caller-supplied hashes. This
* actually needs to be thought through better.
# include "bits.h"
#endif
+#ifndef HASH_H
+# include "hash.h"
+#endif
+
/*----- Type definitions --------------------------------------------------*/
/* --- Symbol table --- *
*/
typedef struct sym_table {
- unsigned long mask; /* Bit mask for hashing purposes */
- size_t c; /* Down counter for growing table */
- struct sym_base **a; /* Array of hash bins */
+ hash_table t;
+ size_t load;
} sym_table;
/* --- A symbol table entry --- *
#define SYM_BUFSZ 16 /* Size of local string buffer */
typedef struct sym_base {
- struct sym_base *next; /* Next symbol in hash bin */
- uint32 hash; /* Hash value for symbol's name */
+ hash_base b;
union {
char *p; /* Pointer to name string */
char b[SYM_BUFSZ]; /* Buffer containing a short name */
/* --- An iterator block --- */
-typedef struct sym_iter {
- sym_table *t; /* Symbol table being iterated */
- sym_base *n; /* Address of next item to return */
- size_t i; /* Index of next hash bin to use */
-} sym_iter;
+typedef hash_iter sym_iter;
/*----- External functions ------------------------------------------------*/
* may be given, in which case the name may contain arbitrary
* binary data, or it may be given as a negative number, in
* which case the length of the name is calculated as
- * @strlen(n) + 1@. The name pointer @n@ may also be zero; in
- * this case, @l@ is taken to be a raw hash, and any element
- * with a matching hash is taken to be the one wanted.
+ * @strlen(n) + 1@.
*
* The return value is the address of a pointer to a @sym_base@
* block (which may have other things on the end, as above). If
* iterate through a symbol table.
*/
+#define SYM_MKITER(i, t) HASH_MKITER((i), &(t)->t)
+
extern void sym_mkiter(sym_iter */*i*/, sym_table */*t*/);
/* --- @sym_next@ --- *
* returned in any particular order.
*/
+#define SYM_NEXT(i, p) do { \
+ hash_base *_q; \
+ HASH_NEXT((i), _q); \
+ (p) = (void *)_q; \
+} while (0)
+
extern void *sym_next(sym_iter */*i*/);
/*----- That's all, folks -------------------------------------------------*/