@@@ much mess, mostly manpages
[mLib] / struct / sym.3.in
CommitLineData
b6b9d458 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for symbol table
4.\"
5.\" (c) 1999, 2001, 2003, 2005, 2007, 2009, 2023, 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH sym 3mLib "8 May 1999" "Straylight/Edgeware" "mLib utilities library"
08da152e 32.\" @sym_create
33.\" @sym_destroy
34.\" @sym_find
35.\" @sym_remove
36.\" @sym_mkiter
37.\" @sym_next
c4ccbbf9 38.
08da152e 39.\" @SYM_NAME
0c404077 40.\" @SYM_LEN
41.\" @SYM_HASH
c4ccbbf9
MW
42.
43.\"--------------------------------------------------------------------------
44.SH NAME
45sym \- symbol table manager
46.
47.\"--------------------------------------------------------------------------
b6b9d458 48.SH SYNOPSIS
c4ccbbf9 49.
b6b9d458 50.nf
51.B "#include <mLib/sym.h>"
d056fbdf 52.PP
4729aa69
MW
53.B "type struct { ...\& } sym_table;"
54.B "type struct { ...\& } sym_base;"
55.B "type struct { ...\& } sym_iter;"
d056fbdf 56.PP
b6b9d458 57.BI "void sym_create(sym_table *" t );
58.BI "void sym_destroy(sym_table *" t );
d056fbdf 59.PP
adec5584
MW
60.ta \w'\fBvoid *sym_find('u
61.BI "void *sym_find(sym_table *" t ,
62.BI " const char *" n ", long " l ,
63.BI " size_t " sz ", unsigned *" f );
b6b9d458 64.BI "void sym_remove(sym_table *" t ", void *" b );
d056fbdf 65.PP
0c404077 66.BI "const char *SYM_NAME(const void *" p );
67.BI "size_t SYM_LEN(const void *" p );
68.BI "uint32 SYM_HASH(const void *" p );
d056fbdf 69.PP
b6b9d458 70.BI "void sym_mkiter(sym_iter *" i ", sym_table *" t );
71.BI "void *sym_next(sym_iter *" i );
72.fi
c4ccbbf9
MW
73.
74.\"--------------------------------------------------------------------------
0c404077 75.SH "DESCRIPTION"
c4ccbbf9 76.
b6b9d458 77The
78.B sym
79functions implement a data structure often described as a dictionary, a
80finite map, an associative array, or a symbol table. It associates
81.I values
82with
83.I keys
84such that the value corresponding to a given key can be found quickly.
85Additionally, all stored associations can be enumerated.
86.PP
87The interface provides an
88.I intrusive
89symbol table. The data objects stored in the table must include a small
90header used by the symbol table manager. This reduces the amount of
91pointer fiddling that needs to be done, and in practice doesn't seem to
92be much of a problem. It's also fairly easy to construct a
93non-intrusive interface if you really want one.
94.PP
95There are three main data structures involved in the interface:
96.TP
97.B sym_table
98Keeps track of the information associated with a particular table.
99.TP
100.B sym_base
101The header which must be attached to the front of all the value
102objects.
103.TP
104.B sym_iter
105An iterator object, used for enumerating all of the associations stored
106in a symbol table.
107.PP
108All of the above data structures should be considered
109.IR opaque :
110don't try looking inside. Representations have changed in the past, and
111they may change again in the future.
c4ccbbf9 112.
0c404077 113.SS "Creation and destruction"
b6b9d458 114The
115.B sym_table
116object itself needs to be allocated by the caller. It is initialized by
117passing it to the function
118.BR sym_create .
119After initialization, the table contains no entries.
120.PP
121Initializing a symbol table involves allocating some memory. If this
d2a91066 122allocation fails, an
b6b9d458 123.B EXC_NOMEM
124exception is raised.
125.PP
126When a symbol table is no longer needed, the memory occupied by the
127values and other maintenance structures can be reclaimed by calling
128.BR sym_destroy .
0c404077 129Any bits of user data attached to values should previously have been
130destroyed.
c4ccbbf9 131.
0c404077 132.SS "Adding, searching and removing"
b6b9d458 133Most of the actual work is done by the function
134.BR sym_find .
135It does both lookup and creation, depending on its arguments. To do its
136job, it needs to know the following bits of information:
137.TP
ff76c38f 138.BI "sym_table *" t
b6b9d458 139A pointer to a symbol table to manipulate.
140.TP
ff76c38f 141.BI "const char *" n
b6b9d458 142The address of the
143.I key
144to look up or create. Usually this will be a simple text string,
145although it can actually be any arbitrary binary data.
146.TP
ff76c38f 147.BI "long " l
b6b9d458 148The length of the key. If this is \-1,
149.B sym_find
150assumes that the key is a null-terminated string, and calculates its
0c404077 151length itself. This is entirely equivalent to passing
152.BI strlen( n )\fR.
b6b9d458 153.TP
ff76c38f 154.BI "size_t " sz
b6b9d458 155The size of the value block to allocate if the key could not be found.
156If this is zero, no value is allocated, and a null pointer is returned
157to indicate an unsuccessful lookup.
158.TP
ff76c38f 159.BI "unsigned *" f
b6b9d458 160The address of a `found' flag to set. This is an output parameter. On
161exit,
162.B sym_find
163will set the value of
164.BI * f
165to zero if the key could not be found, or nonzero if it was found. This
166can be used to tell whether the value returned has been newly allocated,
167or whether it was already in the table.
168.PP
0c404077 169A terminating null byte is appended to the copy of the symbol's name in
170memory. This is not considered to be a part of the symbol's name, and
171does not contribute to the name's length as reported by the
172.B SYM_LEN
173macro.
b6b9d458 174.PP
175A symbol can be removed from the table by calling
176.BR sym_remove ,
177passing the symbol table itself, and the value block that needs
178removing.
c4ccbbf9 179.
0c404077 180.SS "Enquiries about symbols"
181Three macros are provided to enable simple enquiries about a symbol.
182Given a pointer
183.I s
184to a symbol table entry,
185.BI SYM_LEN( s )
186returns the length of the symbol's name (excluding any terminating null
d4efbcd9 187byte);
0c404077 188.BI SYM_NAME( s )
189returns a pointer to the symbol's name; and
190.BI SYM_HASH( s )
191returns the symbol's hash value.
c4ccbbf9 192.
0c404077 193.SS "Enumerating symbols"
b6b9d458 194Enumerating the values in a symbol table is fairly simple. Allocate a
195.B sym_iter
196object from somewhere. Attach it to a symbol table by calling
197.BR sym_mkiter ,
198and passing in the addresses of the iterator and the symbol table.
199Then, each call to
200.B sym_next
201will return a different value from the symbol table, until all of them
202have been enumerated, at which point,
203.B sym_next
204returns a null pointer.
205.PP
206It's safe to remove the symbol you've just been returned by
207.BR sym_next .
208However, it's not safe to remove any other symbol. So don't do that.
209.PP
210When you've finished with an iterator, it's safe to just throw it away.
211You don't need to call any functions beforehand.
c4ccbbf9 212.
0c404077 213.SS "Use in practice"
b6b9d458 214In normal use, the keys are simple strings (usually identifiers from
215some language), and the values are nontrivial structures providing
216information about types and values.
217.PP
218In this case, you'd define something like the following structure for
219your values:
220.VS
d056fbdf 221.ta 2 20m
b6b9d458 222typedef struct val {
d056fbdf
MW
223 sym_base _base; /* Symbol header */
224 unsigned type; /* Type of this symbol */
225 int dispoff; /* Which display variable is in */
226 size_t frameoff; /* Offset of variable in frame */
b6b9d458 227} val;
228.VE
229Given a pointer
230.I v
231to a
232.BR val ,
233you can find the variable's name by calling
234.BI SYM_NAME( v )\fR.
235.PP
236You can look up a name in the table by saying something like:
237.VS
d056fbdf 238.ta 2n
b6b9d458 239val *v = sym_find(t, name, -1, 0, 0);
240if (!v)
d056fbdf 241 error("unknown variable `%s'", name);
b6b9d458 242.VE
243You can add in a new variable by saying something like
244.VS
d056fbdf 245.ta 2n
b6b9d458 246unsigned f;
247val *v = sym_find(t, name, -1, sizeof(val), &f);
248if (f)
d056fbdf 249 error("variable `%s' already exists", name);
b6b9d458 250/* fill in v */
251.VE
252You can examine all the variables in your symbol table by saying
253something like:
254.VS
d056fbdf 255.ta 2n
b6b9d458 256sym_iter i;
257val *v;
d056fbdf 258.VP
b6b9d458 259for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; ) {
d056fbdf 260 /* ... */
b6b9d458 261}
262.VE
263That ought to be enough examples to be getting on with.
c4ccbbf9 264.
0c404077 265.SS Implementation
6f444bda 266The symbol table is an extensible hashtable, using the universal hash
267function described in
268.BR unihash (3)
269and the global hashing key. The hash chains are kept very short
270(probably too short, actually). Every time a symbol is found, its block
271is promoted to the front of its bin chain so it gets found faster next
272time.
c4ccbbf9
MW
273.
274.\"--------------------------------------------------------------------------
b6b9d458 275.SH SEE ALSO
c4ccbbf9 276.
0c404077 277.BR hash (3),
08da152e 278.BR mLib (3).
c4ccbbf9
MW
279.
280.\"--------------------------------------------------------------------------
b6b9d458 281.SH AUTHOR
c4ccbbf9 282.
9b5ac6ff 283Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
284.
285.\"----- That's all, folks --------------------------------------------------