3 * $Id: sym.c,v 1.4 1998/01/12 16:46:28 mdw Exp $
5 * Symbol table management
10 /*----- Licensing notice --------------------------------------------------*
12 * This file is part of `become'
14 * `Become' is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * `Become' is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with `become'; if not, write to the Free Software Foundation,
26 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 /*----- Revision history --------------------------------------------------*
32 * Revision 1.4 1998/01/12 16:46:28 mdw
35 * Revision 1.3 1997/08/20 16:22:59 mdw
38 * Revision 1.2 1997/08/04 10:24:25 mdw
39 * Sources placed under CVS control.
41 * Revision 1.1 1997/07/21 13:47:44 mdw
46 /*----- Header files ------------------------------------------------------*/
48 /* --- ANSI headers --- */
54 /* --- Local headers --- */
59 /*----- Tuning parameters -------------------------------------------------*/
61 /* --- Initial hash table size --- *
63 * This is the initial @mask@ value. It must be of the form %$2^n - 1$%,
64 * so that it can be used to mask of the bottom bits of a hash value.
67 #define sym__iSize 255 /* Size of a new hash table */
69 /* --- Maximum load factor --- *
71 * This parameter controls how much the table has to be loaded before the
72 * table is extended. The number of elements %$n$%, the number of bins %$b$%
73 * and the limit %$l$% satisfy the relation %$n < bl$%; if a new item is
74 * added to the table and this relation is found to be false, the table is
77 * The parameter @sym__limFactor@ is the value of %$l$% multiplied by 256;
78 * fixed point arithmetic is used to calculate the allowable load. Note
79 * that the exact calculation of this criterion isn't important; what's more
80 * significant is that the parameter allows tuning of the rehashing process.
82 * The current value is %$3 \over 4$%, which appears to be reasonable on the
86 #define sym__limFactor 0x0C0 /* Load factor for growing table */
88 /*----- CRC table ---------------------------------------------------------*/
90 /*****************************************************************/
92 /* CRC LOOKUP TABLE */
93 /* ================ */
95 /* The following CRC lookup table was generated automagically */
96 /* by `crcgen', which is based on the Rocksoft^tm Model CRC */
97 /* Algorithm Table Generation Program. The model parameters */
98 /* supplied to `crcgen' were: */
100 /* Width : 32 bits */
101 /* Poly : 0x04C11DB7 */
102 /* Init : 0xFFFFFFFF */
103 /* XorOut : 0xFFFFFFFF */
105 /* Check : 0xCBF43926 */
107 /* For more information on the Rocksoft^tm Model CRC Algorithm, */
108 /* see the document titled `A Painless Guide to CRC Error */
109 /* Detection Algorithms' by Ross Williams */
110 /* (ross@@guest.adelaide.edu.au.). This document is likely to be */
111 /* in the FTP archive `ftp.adelaide.edu.au/pub/rocksoft'. */
113 /*****************************************************************/
115 static unsigned long sym__crcTable
[256] = {
116 0x00000000L
, 0x77073096L
, 0xEE0E612CL
, 0x990951BAL
,
117 0x076DC419L
, 0x706AF48FL
, 0xE963A535L
, 0x9E6495A3L
,
118 0x0EDB8832L
, 0x79DCB8A4L
, 0xE0D5E91EL
, 0x97D2D988L
,
119 0x09B64C2BL
, 0x7EB17CBDL
, 0xE7B82D07L
, 0x90BF1D91L
,
120 0x1DB71064L
, 0x6AB020F2L
, 0xF3B97148L
, 0x84BE41DEL
,
121 0x1ADAD47DL
, 0x6DDDE4EBL
, 0xF4D4B551L
, 0x83D385C7L
,
122 0x136C9856L
, 0x646BA8C0L
, 0xFD62F97AL
, 0x8A65C9ECL
,
123 0x14015C4FL
, 0x63066CD9L
, 0xFA0F3D63L
, 0x8D080DF5L
,
124 0x3B6E20C8L
, 0x4C69105EL
, 0xD56041E4L
, 0xA2677172L
,
125 0x3C03E4D1L
, 0x4B04D447L
, 0xD20D85FDL
, 0xA50AB56BL
,
126 0x35B5A8FAL
, 0x42B2986CL
, 0xDBBBC9D6L
, 0xACBCF940L
,
127 0x32D86CE3L
, 0x45DF5C75L
, 0xDCD60DCFL
, 0xABD13D59L
,
128 0x26D930ACL
, 0x51DE003AL
, 0xC8D75180L
, 0xBFD06116L
,
129 0x21B4F4B5L
, 0x56B3C423L
, 0xCFBA9599L
, 0xB8BDA50FL
,
130 0x2802B89EL
, 0x5F058808L
, 0xC60CD9B2L
, 0xB10BE924L
,
131 0x2F6F7C87L
, 0x58684C11L
, 0xC1611DABL
, 0xB6662D3DL
,
132 0x76DC4190L
, 0x01DB7106L
, 0x98D220BCL
, 0xEFD5102AL
,
133 0x71B18589L
, 0x06B6B51FL
, 0x9FBFE4A5L
, 0xE8B8D433L
,
134 0x7807C9A2L
, 0x0F00F934L
, 0x9609A88EL
, 0xE10E9818L
,
135 0x7F6A0DBBL
, 0x086D3D2DL
, 0x91646C97L
, 0xE6635C01L
,
136 0x6B6B51F4L
, 0x1C6C6162L
, 0x856530D8L
, 0xF262004EL
,
137 0x6C0695EDL
, 0x1B01A57BL
, 0x8208F4C1L
, 0xF50FC457L
,
138 0x65B0D9C6L
, 0x12B7E950L
, 0x8BBEB8EAL
, 0xFCB9887CL
,
139 0x62DD1DDFL
, 0x15DA2D49L
, 0x8CD37CF3L
, 0xFBD44C65L
,
140 0x4DB26158L
, 0x3AB551CEL
, 0xA3BC0074L
, 0xD4BB30E2L
,
141 0x4ADFA541L
, 0x3DD895D7L
, 0xA4D1C46DL
, 0xD3D6F4FBL
,
142 0x4369E96AL
, 0x346ED9FCL
, 0xAD678846L
, 0xDA60B8D0L
,
143 0x44042D73L
, 0x33031DE5L
, 0xAA0A4C5FL
, 0xDD0D7CC9L
,
144 0x5005713CL
, 0x270241AAL
, 0xBE0B1010L
, 0xC90C2086L
,
145 0x5768B525L
, 0x206F85B3L
, 0xB966D409L
, 0xCE61E49FL
,
146 0x5EDEF90EL
, 0x29D9C998L
, 0xB0D09822L
, 0xC7D7A8B4L
,
147 0x59B33D17L
, 0x2EB40D81L
, 0xB7BD5C3BL
, 0xC0BA6CADL
,
148 0xEDB88320L
, 0x9ABFB3B6L
, 0x03B6E20CL
, 0x74B1D29AL
,
149 0xEAD54739L
, 0x9DD277AFL
, 0x04DB2615L
, 0x73DC1683L
,
150 0xE3630B12L
, 0x94643B84L
, 0x0D6D6A3EL
, 0x7A6A5AA8L
,
151 0xE40ECF0BL
, 0x9309FF9DL
, 0x0A00AE27L
, 0x7D079EB1L
,
152 0xF00F9344L
, 0x8708A3D2L
, 0x1E01F268L
, 0x6906C2FEL
,
153 0xF762575DL
, 0x806567CBL
, 0x196C3671L
, 0x6E6B06E7L
,
154 0xFED41B76L
, 0x89D32BE0L
, 0x10DA7A5AL
, 0x67DD4ACCL
,
155 0xF9B9DF6FL
, 0x8EBEEFF9L
, 0x17B7BE43L
, 0x60B08ED5L
,
156 0xD6D6A3E8L
, 0xA1D1937EL
, 0x38D8C2C4L
, 0x4FDFF252L
,
157 0xD1BB67F1L
, 0xA6BC5767L
, 0x3FB506DDL
, 0x48B2364BL
,
158 0xD80D2BDAL
, 0xAF0A1B4CL
, 0x36034AF6L
, 0x41047A60L
,
159 0xDF60EFC3L
, 0xA867DF55L
, 0x316E8EEFL
, 0x4669BE79L
,
160 0xCB61B38CL
, 0xBC66831AL
, 0x256FD2A0L
, 0x5268E236L
,
161 0xCC0C7795L
, 0xBB0B4703L
, 0x220216B9L
, 0x5505262FL
,
162 0xC5BA3BBEL
, 0xB2BD0B28L
, 0x2BB45A92L
, 0x5CB36A04L
,
163 0xC2D7FFA7L
, 0xB5D0CF31L
, 0x2CD99E8BL
, 0x5BDEAE1DL
,
164 0x9B64C2B0L
, 0xEC63F226L
, 0x756AA39CL
, 0x026D930AL
,
165 0x9C0906A9L
, 0xEB0E363FL
, 0x72076785L
, 0x05005713L
,
166 0x95BF4A82L
, 0xE2B87A14L
, 0x7BB12BAEL
, 0x0CB61B38L
,
167 0x92D28E9BL
, 0xE5D5BE0DL
, 0x7CDCEFB7L
, 0x0BDBDF21L
,
168 0x86D3D2D4L
, 0xF1D4E242L
, 0x68DDB3F8L
, 0x1FDA836EL
,
169 0x81BE16CDL
, 0xF6B9265BL
, 0x6FB077E1L
, 0x18B74777L
,
170 0x88085AE6L
, 0xFF0F6A70L
, 0x66063BCAL
, 0x11010B5CL
,
171 0x8F659EFFL
, 0xF862AE69L
, 0x616BFFD3L
, 0x166CCF45L
,
172 0xA00AE278L
, 0xD70DD2EEL
, 0x4E048354L
, 0x3903B3C2L
,
173 0xA7672661L
, 0xD06016F7L
, 0x4969474DL
, 0x3E6E77DBL
,
174 0xAED16A4AL
, 0xD9D65ADCL
, 0x40DF0B66L
, 0x37D83BF0L
,
175 0xA9BCAE53L
, 0xDEBB9EC5L
, 0x47B2CF7FL
, 0x30B5FFE9L
,
176 0xBDBDF21CL
, 0xCABAC28AL
, 0x53B39330L
, 0x24B4A3A6L
,
177 0xBAD03605L
, 0xCDD70693L
, 0x54DE5729L
, 0x23D967BFL
,
178 0xB3667A2EL
, 0xC4614AB8L
, 0x5D681B02L
, 0x2A6F2B94L
,
179 0xB40BBE37L
, 0xC30C8EA1L
, 0x5A05DF1BL
, 0x2D02EF8DL
,
182 /*****************************************************************/
183 /* End of CRC Lookup Table */
184 /*****************************************************************/
186 /* --- Work out the CRC of a bytestring --- */
188 #define sym__crc(s_, l_, hv_) do { \
189 unsigned long h_ = 0xFFFFFFFFL; \
190 const char *p_ = s_; \
191 const char *e_ = (s_) + (l_); \
194 h_ = (h_ >> 8) ^ (sym__crcTable[(*p_++ ^ h_) & 0xFF]); \
195 hv_ = ~h_ & 0xFFFFFFFFL; \
198 /*----- Main code ---------------------------------------------------------*/
202 /* --- @sym_createTable@ --- *
204 * Arguments: @sym_table *t@ = symbol table to initialise
208 * Use: Initialises the given symbol table.
211 void sym_createTable(sym_table
*t
)
215 t
->mask
= sym__iSize
;
216 t
->c
= (sym__iSize
* sym__limFactor
) >> 8;
217 t
->a
= xmalloc((t
->mask
+ 1) * sizeof(sym_base
*));
219 for (i
= 0; i
< sym__iSize
+ 1; i
++)
223 /* --- @sym_destroyTable@ --- *
225 * Arguments: @sym_table *t@ = pointer to symbol table in question
229 * Use: Destroys a symbol table, freeing all the memory it used to
233 void sym_destroyTable(sym_table
*t
)
238 for (i
= 0; i
<= t
->mask
; i
++) {
250 /* --- @sym_find@ --- *
252 * Arguments: @sym_table *t@ = pointer to symbol table in question
253 * @const char *n@ = pointer to symbol table to look up
254 * @long l@ = length of the name string or negative to measure
255 * @size_t sz@ = size of desired symbol object, or zero
256 * @unsigned *f@ = pointer to a flag, or null.
258 * Returns: The address of a @sym_base@ structure, or null if not found
261 * Use: Looks up a symbol in a given symbol table. The name is
262 * passed by the address of its first character. The length
263 * may be given, in which case the name may contain arbitrary
264 * binary data, or it may be given as a negative number, in
265 * which case the length of the name is calculated as
268 * The return value is the address of a pointer to a @sym_base@
269 * block (which may have other things on the end, as above). If
270 * the symbol could be found, the return value points to the
271 * symbol block. If the symbol wasn't there, then if @sz@ is
272 * nonzero, a new symbol is created and its address is returned;
273 * otherwise a null pointer is returned.
275 * The value of @*f@ indicates whether a new symbol entry was
276 * created: a nonzero value indicates that an old value was
280 void *sym_find(sym_table
*t
, const char *n
, long l
, size_t sz
, unsigned *f
)
282 unsigned long hash
; /* Hash value for user's name */
283 size_t len
= l
< 0 ?
strlen(n
) + 1 : l
; /* Find length of user's name */
284 sym_base
*bin
; /* Bin containing our item */
285 sym_base
*p
, *q
; /* Pointer wandering through list */
287 /* --- Find the correct bin --- */
289 sym__crc(n
, len
, hash
); /* Find hash value for this name */
290 bin
= p
= (sym_base
*)(t
->a
+ (hash
& t
->mask
));
292 /* --- Search the bin list --- */
295 if (hash
== p
->next
->hash
&& /* Check hash values first */
296 len
== p
->next
->len
&& /* Then compare string lengths */
297 !memcmp(n
, p
->next
->name
, len
)) /* And finally compare the strings */
299 /* --- Found a match --- *
301 * As a minor, and probably pointless, tweak, move the item to the
302 * front of its bin list.
305 q
= p
->next
; /* Find the actual symbol block */
306 p
->next
= q
->next
; /* Extract block from bin list */
307 q
->next
= bin
->next
; /* Set up symbol's next pointer */
308 bin
->next
= q
; /* And reinsert the block */
310 /* --- Return the block --- */
312 if (f
) *f
= 1; /* Didn't fail to find the item */
313 return (q
); /* And return the block */
316 p
= p
->next
; /* Move onto the next item */
319 /* --- Couldn't find the item there --- */
321 if (f
) *f
= 0; /* Failed to find the block */
322 if (!sz
) return (0); /* Return zero if not creating */
324 /* --- Create a new symbol block and initialise it --- */
326 p
= xmalloc(sz
); /* Create a new symbol block */
327 p
->next
= bin
->next
; /* Set up the next pointer */
328 p
->hash
= hash
; /* Set up the hash value too */
329 p
->name
= xmalloc(len
); /* Allocate a block for the name */
330 p
->len
= len
; /* And set up the string length */
331 memcpy(p
->name
, n
, len
); /* And copy the string over */
333 bin
->next
= p
; /* Put the pointer into the bin */
335 /* --- Consider growing the array --- */
338 unsigned long m
= t
->mask
+ 1; /* Next set bit in has word */
339 sym_base
*p
, *q
, *r
; /* More useful pointers */
340 size_t i
, lim
; /* Loop counter and limit */
342 /* --- Update values in the anchor block --- */
344 t
->c
= ((t
->mask
+ 1) * sym__limFactor
) >> 8; /* Set load value */
345 t
->mask
= (t
->mask
+ 1) * 2 - 1; /* Set the new mask value */
346 t
->a
= xrealloc(t
->a
, (t
->mask
+ 1) * sizeof(sym_base
*));
348 /* --- Now wander through the table rehashing things --- *
350 * This loop is very careful to avoid problems with aliasing. The items
351 * are dealt with from the end backwards to avoid overwriting bins before
352 * they've been processed.
355 lim
= (t
->mask
+ 1) >> 1;
356 for (i
= 0; i
< lim
; i
++) {
357 /* --- Some initialisation --- */
359 r
= t
->a
[i
]; /* Find the list we're dissecting */
360 p
= (sym_base
*)(t
->a
+ i
); /* Find bit-clear list */
361 q
= (sym_base
*)(t
->a
+ i
+ lim
); /* And the bit-set lsit */
363 /* --- Now go through the @r@ list --- */
366 if (r
->hash
& m
) /* Is the next bit set? */
367 q
= q
->next
= r
; /* Yes, so fit up previous link */
369 p
= p
->next
= r
; /* No, so fit up previous link */
370 r
= r
->next
; /* Move onto the next item */
372 p
->next
= q
->next
= 0; /* Null terminate the new lists */
376 /* --- Finished that, so return the new symbol block --- */
381 /* --- @sym_remove@ --- *
383 * Arguments: @sym_table *i@ = pointer to a symbol table object
384 * @void *b@ = pointer to symbol table entry
388 * Use: Removes the object from the symbol table. The space occupied
389 * by the object and its name is freed; anything else attached
390 * to the entry should already be gone by this point.
393 void sym_remove(sym_table
*t
, void *b
)
395 /* --- A quick comment --- *
397 * Since the @sym_base@ block contains the hash, finding the element in the
398 * bin list is really quick -- it's not worth bothering with things like
399 * doubly linked lists.
403 sym_base
*bin
= (sym_base
*)(t
->a
+ (p
->hash
& t
->mask
));
405 /* --- Find the item in the bin list --- */
415 /* --- Now just remove the item from the list and free it --- *
417 * Oh, and bump the load counter.
426 /* --- @sym_createIter@ --- *
428 * Arguments: @sym_iter *i@ = pointer to an iterator object
429 * @sym_table *t@ = pointer to a symbol table object
433 * Use: Creates a new symbol table iterator which may be used to
434 * iterate through a symbol table.
437 void sym_createIter(sym_iter
*i
, sym_table
*t
)
444 /* --- @sym_next@ --- *
446 * Arguments: @sym_iter *i@ = pointer to iterator object
448 * Returns: Pointer to the next symbol found, or null when finished.
450 * Use: Returns the next symbol from the table. Symbols are not
451 * returned in any particular order.
454 void *sym_next(sym_iter
*i
)
458 /* --- Find the next item --- */
461 if (i
->i
> i
->t
->mask
)
463 i
->n
= i
->t
->a
[i
->i
++];
466 /* --- Update the iterator block --- */
478 /*----- CRC test code -----------------------------------------------------*/
485 sym__crc("123456789", 9, h
);
486 printf("crc check == %04x\n", h
);
492 /*----- Symbol table test code --------------------------------------------*/
501 typedef struct sym_word
{
507 /* --- What it does --- *
509 * Reads the file /usr/dict/words (change to some other file full of
510 * interesting and appropriate bits of text to taste) into a big buffer and
511 * picks apart into lines. Then picks lines at random and enters them into
517 char *buff
, *p
, *lim
;
526 /* --- Initialise for reading the file --- */
529 buff
= xmalloc(sz
+ 1);
532 if ((fp
= fopen("/usr/dict/words", "r")) == 0)
533 fprintf(stderr
, "buggered ;-( (%s)\n", strerror(errno
));
535 /* --- Read buffers of text --- *
537 * Read a buffer. If more to come, double the buffer size and try again.
538 * This is the method I recommended to comp.lang.c, so I may as well try
543 i
= fread(buff
+ done
, 1, sz
- done
, fp
);
548 buff
= xrealloc(buff
, sz
+ 1);
551 /* --- Count the lines --- */
556 for (p
= buff
; p
< lim
; p
++)
557 if (*p
== '\n') sz
++;
559 /* --- Build a table of line starts --- */
561 line
= xmalloc(sz
* sizeof(char *));
564 for (p
= buff
; p
< lim
; p
++)
565 if (*p
== '\n') *p
= 0, line
[i
++] = p
+ 1;
568 /* --- Build a table of lines --- *
570 * This reverses the mapping which the symbol table performs, so that its
571 * accuracy can be tested.
574 flag
= xmalloc(sz
* sizeof(sym_word
*));
575 for (i
= 0; i
< sz
; i
++)
579 sym_createTable(&tbl
);
582 i
= (unsigned)rand() % sz
;
589 /* printf("find `%s'\n", line[i]); */
590 if ((rand() & 1023) == 0) {
591 putchar('.'); fflush(stdout
);
594 w
= sym_find(&tbl
, line
[i
], -1, 0, 0);
596 printf("*** error: find `%s' gave %p not %p\n",
597 line
[i
], (void *)w
, (void *)flag
[i
]);
598 else if (w
&& w
->i
!= i
)
599 printf("*** error: find sym for `%s' gives index %i not %i\n",
607 /* printf("create `%s'\n", line[i]); */
608 if ((rand() & 1023) == 0) {
609 putchar('+'); fflush(stdout
);
612 w
= sym_find(&tbl
, line
[i
], -1, sizeof(sym_word
), &f
);
616 printf("*** error: create `%s' gave %p not %p\n",
617 line
[i
], (void *)w
, (void *)flag
[i
]);
618 else if (w
&& w
->i
!= i
)
619 printf("*** error: create sym for `%s' gives index %i not %i\n",
625 printf("*** error: create `%s' gave new block, should be %p\n",
626 line
[i
], (void *)flag
[i
]);
638 int v
= (rand() % entries
) == 0;
641 printf("\niterated %i entries\n", entries
);
646 ntbl
= xmalloc(sz
* sizeof(sym_word
*));
647 memcpy(ntbl
, flag
, sz
* sizeof(sym_word
*));
648 sym_createIter(&it
, &tbl
);
650 while ((w
= sym_next(&it
)) != 0) {
652 printf("*** error: iterate returned duff item %i\n", w
->i
);
657 for (i
= 0; i
< sz
; i
++)
658 if (ntbl
[i
]) printf("*** error: iterate didn't return item %i\n",
665 int v
= rand() & 255 ?
0 : 1;
670 for (i
= 0; i
<= tbl
.mask
; i
++) {
671 if (!tbl
.a
[i
]) continue;
672 if (v
) printf(" %i: ", i
);
675 if ((b
->hash
& tbl
.mask
) != i
)
676 printf("*** error: bad hash value found");
677 if (v
) printf("`%s'(%08lx:%lu) ",
678 line
[((sym_word
*)b
)->i
],
683 if (v
) putchar('\n');
689 /* printf("remove `%s'\n", flag[i]->base.name); */
690 if ((rand() & 1023) == 0) {
691 putchar('-'); fflush(stdout
);
693 sym_remove(&tbl
, flag
[i
]);
707 /*----- That's all, folks -------------------------------------------------*/