@@@ much mess, mostly manpages
[mLib] / struct / hash.3.in
CommitLineData
03d53b73 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for hash table framework
4.\"
5.\" (c) 1999, 2001, 2003, 2005, 2009, 2023, 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH hash 3mLib "2 August 1999" "Straylight/Edgeware" "mLib utilities library"
03d53b73 32.\" @hash_create
33.\" @hash_destroy
34.\" @hash_bin
35.\" @hash_extend
36.\" @hash_remove
37.\" @hash_mkiter
38.\" @hash_next
c4ccbbf9 39.
03d53b73 40.\" @HASH_BIN
41.\" @HASH_MKITER
42.\" @HASH_NEXT
c4ccbbf9
MW
43.
44.\"--------------------------------------------------------------------------
45.SH "NAME"
46hash \- low-level hashtable implementation
47.
48.\"--------------------------------------------------------------------------
03d53b73 49.SH "SYNOPSIS"
c4ccbbf9 50.
03d53b73 51.nf
52.B "#include <mLib/hash.h>"
d056fbdf 53.PP
adec5584 54.ta 2n
4729aa69 55.B "typedef struct {"
adec5584
MW
56.B " uint32 mask;"
57.B " hash_base **v;"
58.B " arena *a;"
4729aa69 59.B "} hash_table;"
d056fbdf 60.PP
4729aa69 61.B "typedef struct {"
adec5584
MW
62.B " hash_base *next;"
63.B " uint32 hash;"
4729aa69 64.B "} hash_base;"
d056fbdf 65.PP
4729aa69 66.B "typedef struct { ...\& } hash_iter;"
d056fbdf 67.PP
03d53b73 68.BI "void hash_create(hash_table *" t ", size_t " n );
69.BI "void hash_destroy(hash_table *" t );
70.BI "hash_base **hash_bin(hash_table *" t ", uint32 " hash );
71.BI "int hash_extend(hash_table *" t );
db0e70a1 72.BI "void hash_remove(hash_table *" t ", hash_base *" b );
03d53b73 73.BI "void hash_mkiter(hash_iter *" i ", hash_table *" t );
74.BI "hash_base *hash_next(hash_iter *" i );
d056fbdf 75.PP
03d53b73 76.BI "hash_base **HASH_BIN(hash_table *" t ", uint32 " hash );
77.BI "void HASH_MKITER(hash_iter *" i ", hash_table *" t );
78.BI "void HASH_NEXT(hash_iter *" i ", " b );
79.fi
c4ccbbf9
MW
80.
81.\"--------------------------------------------------------------------------
03d53b73 82.SH "OVERVIEW"
c4ccbbf9 83.
03d53b73 84The
85.B hash
86functions provide the basis for an extensible hashtable implementation.
87The implementation is not complete. Many decisions have been left to
88the user, including:
89.hP \*o
528c8b4d 90how keys should be represented, hashed and compared;
03d53b73 91.hP \*o
528c8b4d 92how objects contained within the table should be allocated; and
03d53b73 93.hP \*o
528c8b4d 94when the hashtable should be extended.
03d53b73 95.PP
96A complete hashtable implementation will need to take the above
97decisions. If you just want a prepackaged solution, see
98.BR sym (3)
99which provides one.
c4ccbbf9
MW
100.
101.\"--------------------------------------------------------------------------
03d53b73 102.SH "IMPLEMENTATION DETAILS"
c4ccbbf9 103.
03d53b73 104Each item in the hashtable is assigned a 32-bit integer
105.IR hash :
106a number computed somehow from the item's data such that two items which
107are considered equal will yield the same hash. Ideally, different items
108will yield different hashes. It is important for this implementation
109that all bits of the hash are similarly good.
110.PP
111In order to look up an item, the high bits of the hash are masked off
112and the remainder used as an index into a vector of
113.IR "bin lists" .
114Each bin contains a list of items with identical low-order bits of their
115hashes.
116.PP
117A table expansion involves doubling the size of the bin vector. Each
118bin list is then split into two, items being placed into a new bin
119depending on the setting of the next lowest hash bit. By expanding the
120hashtable as needed, lookups remain constant-time.
121.PP
122A low-level hashtable is represented by a
123.B hash_table
124structure. It contains two members:
125.TP
ff76c38f 126.B "uint32 mask"
03d53b73 127The current bitmask to be applied to hashes. This is one less than the
128current number of bins in the hashtable, and is applied to hash values
129in order to decide which bin an item should be in.
130.TP
ff76c38f 131.B "hash_base **v"
03d53b73 132The bin vector. It is an array of pointers to hashtable items.
133.PP
134A hashtable item consists of a
135.B hash_base
136structure. A full hashtable implementation will need to extend this
137structure by adding keying information and other data; the
138.B hash_base
139only contains the bare minimum of information needed to maintain the
140hashtable at a low level. It contains the following members:
141.TP
ff76c38f 142.B "hash_base *next"
03d53b73 143Pointer to the next item in the bin list. The final item has a null
144.B next
145pointer. The entry in the bin vector is null if the bin list is empty.
146It is up to the high-level implementation to insert items into the list.
147.TP
ff76c38f 148.B "uint32 hash"
03d53b73 149The hash for this item. This must be the full 32-bit hash for the
150current item. It is used during hashtable expansion to determine which
151bin an item should be moved to.
c4ccbbf9
MW
152.
153.\"--------------------------------------------------------------------------
03d53b73 154.SH "FUNCTIONALITY PROVIDED"
c4ccbbf9 155.
03d53b73 156This section describes the functions and macros provided for building
157hashtables. Code examples are given throughout. They assume the
158following definitions:
159.VS
d056fbdf 160.ta 2n
03d53b73 161/* --- A table of items --- */
d056fbdf 162.VP
03d53b73 163typedef struct item_table {
d056fbdf
MW
164 hash_table t;
165 size_t load;
03d53b73 166};
d056fbdf 167.VP
03d53b73 168/* --- An item --- */
d056fbdf 169.VP
03d53b73 170typedef struct item {
d056fbdf
MW
171 hash_base b;
172 const char *k;
173 /* ... */
03d53b73 174} item;
175.VE
176The implementation presented here is simple but relatively bad. The
177source file
178.B sym.c
179presents a more realistic example, but is rather more complex.
c4ccbbf9 180.
03d53b73 181.SS "Initialization and finalization"
182An empty hashtable is initialized by calling
183.B hash_create
184with the address of a
185.B hash_table
186structure to be filled in and the initial number of hash bins to create.
187.PP
188For example, an item table might be initialized like this:
189.VS
d056fbdf 190.ta 2n
03d53b73 191void item_createtab(item_table *t)
192{
d056fbdf
MW
193 hash_create(&t->t, ITEM_INITSZ);
194 t->load = ITEM_INITLOAD;
03d53b73 195}
196.VE
197A hashtable can be destroyed by calling
198.B hash_destroy
199with the address of the
200.B hash_table
201structure. This does not attempt to deallocate the individual items;
202that must be done beforehand.
203.PP
204The usual way to deallocate the individual hashtable items is using the
205iteration constructs described below.
206.VS
d056fbdf 207.ta 2n +2n
03d53b73 208void item_destroytab(item_table *t)
209{
d056fbdf
MW
210 hash_iter i;
211 hash_base *b;
212 for (hash_mkiter(&i, &t->t); (b = hash_next(&i)) != 0; ) {
213 item *ii = (item *)b;
214 free(ii->k);
215 /* ... */
216 DESTROY(ii);
217 }
218 hash_destroy(&t->t);
03d53b73 219}
220.VE
221.sp -1
c4ccbbf9 222.
03d53b73 223.SS "Searching, adding and removing"
224Items must be searched for and added by hand.
225.PP
226The macro
227.B HASH_BIN
228returns the address of the bin list haed for a particular hashtable and
229hash value. The function
230.B hash_bin
231works the same way and provides the same result, but since the macro is
232very simple its use is preferred. However, it will evaluate its
233arguments multiple times.
234.PP
235Once the bin list has been found, it's fairly easy to search for an
236exact match. A simple search might look something like this:
237.VS
d056fbdf 238.ta 2n +2n +2n 20m
03d53b73 239item *lookup(item_table *t, const char *k)
240{
d056fbdf
MW
241 uint32 h = hash(k); /* Hash \fIk\fP somehow */
242 hash_base **bin = HASH_BIN(&t->t, h);
243 hash_base *b;
244 for (b = *bin; b; b = b->next) {
245 item *i = (item *)b;
246 if (h == i->b.hash && strcmp(k, i->k) == 0)
247 return (i);
248 }
249 return (0);
03d53b73 250}
251.VE
252Insertion is also relatively trivial given the bin list head. Insertion
253may make the hashtable too large, so it might be necessary to extend
254it. Extension is performed by
255.BR hash_extend ,
256which is passed only the address of the hashtable. It returns nonzero
257if extension was successful.
258.VS
d056fbdf 259.ta 2n +2n
03d53b73 260item *add(item_table *t, const char *k, /* ... */)
261{
d056fbdf
MW
262 item *i;
263 uint32 h;
264 hash_base **bin;
265.VP
266 /* --- See if the item is already there --- */
267.VP
268 if ((i = lookup(t, k)) != 0)
269 return (i);
270.VP
271 /* --- Make a new hashtable item --- */
272.VP
273 i = CREATE(item);
274 i->k = xstrdup(k);
275 /* ... */
276.VP
277 /* --- Link it into the bin list --- */
278.VP
279 h = i->b.hash = hash(k);
280 bin = HASH_BIN(&t->t, h);
281 i->b.next = *bin;
282 *bin = &i->b.next;
283.VP
284 /* --- Maybe extend the hashtable --- */
285.VP
286 if (t->load)
287 t->load--;
288 else if (hash_extend(&t->t))
289 t->load = recalc_load(t);
290.VP
291 /* --- Done --- */
292.VP
293 return (i);
03d53b73 294}
295.VE
296The
297.B sym
298implementation is rather more sophisticated in its approach but the idea
299is the same. In particular,
300.B sym
301provides a single interface for lookup and insertion which prevents the
302unnecessary rehashing performed by the above code.
303.PP
304Removal of items is more straightforward. The function
305.B hash_remove
306will unlink a given item from its bin list, after which point it is safe
307to remove.
c4ccbbf9 308.
03d53b73 309.SS "Iteration"
310Iteration allows code to be performed on all the items in a hashtable.
311This is done using an
312.I iterator
313object, represented by a
314.B hash_iter
315structure. An iterator is initialized by calling
316.BR hash_mkiter .
317Each call to
318.B hash_next
319yields a different item from the hashtable until there are none left, a
320condition signified by a null return value.
321.PP
322The macros
323.B HASH_MKITER
324and
325.B HASH_NEXT
326do the same jobs as the above functions. However,
327.B HASH_NEXT
328has a slightly bizarre argument passing convention: its second argument
329is an
330.I lvalue
331which is updated to contain the address of the next item.
332.PP
333The finalization code above contained an example of iteration.
c4ccbbf9
MW
334.
335.\"--------------------------------------------------------------------------
03d53b73 336.SH "SEE ALSO"
c4ccbbf9 337.
03d53b73 338.BR sym (3),
339.BR mLib (3).
c4ccbbf9
MW
340.
341.\"--------------------------------------------------------------------------
03d53b73 342.SH "AUTHOR"
c4ccbbf9 343.
9b5ac6ff 344Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
345.
346.\"----- That's all, folks --------------------------------------------------