Extra diagnostics in the hope that we can track down the problem with
[u/mdw/putty] / tree234.c
1 /*
2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
3 *
4 * This file is copyright 1999-2001 Simon Tatham.
5 *
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
13 * conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <assert.h>
31
32 #include "tree234.h"
33
34 #define smalloc malloc
35 #define sfree free
36
37 #define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
38
39 #ifdef TEST
40 #define LOG(x) (printf x)
41 #else
42 // FIXME
43 #define LOG(x) (dprintf x)
44 #endif
45
46 typedef struct node234_Tag node234;
47
48 struct tree234_Tag {
49 node234 *root;
50 cmpfn234 cmp;
51 };
52
53 struct node234_Tag {
54 node234 *parent;
55 node234 *kids[4];
56 int counts[4];
57 void *elems[3];
58 };
59
60 /*
61 * Create a 2-3-4 tree.
62 */
63 tree234 *newtree234(cmpfn234 cmp) {
64 tree234 *ret = mknew(tree234);
65 LOG(("created tree %p\n", ret));
66 ret->root = NULL;
67 ret->cmp = cmp;
68 return ret;
69 }
70
71 /*
72 * Free a 2-3-4 tree (not including freeing the elements).
73 */
74 static void freenode234(node234 *n) {
75 if (!n)
76 return;
77 freenode234(n->kids[0]);
78 freenode234(n->kids[1]);
79 freenode234(n->kids[2]);
80 freenode234(n->kids[3]);
81 sfree(n);
82 }
83 void freetree234(tree234 *t) {
84 freenode234(t->root);
85 sfree(t);
86 }
87
88 /*
89 * Internal function to count a node.
90 */
91 static int countnode234(node234 *n) {
92 int count = 0;
93 int i;
94 if (!n)
95 return 0;
96 for (i = 0; i < 4; i++)
97 count += n->counts[i];
98 for (i = 0; i < 3; i++)
99 if (n->elems[i])
100 count++;
101 return count;
102 }
103
104 /*
105 * Count the elements in a tree.
106 */
107 int count234(tree234 *t) {
108 if (t->root)
109 return countnode234(t->root);
110 else
111 return 0;
112 }
113
114 /*
115 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
116 * an existing element compares equal, returns that.
117 */
118 static void *add234_internal(tree234 *t, void *e, int index) {
119 node234 *n, **np, *left, *right;
120 void *orig_e = e;
121 int c, lcount, rcount;
122
123 LOG(("adding node %p to tree %p\n", e, t));
124 if (t->root == NULL) {
125 t->root = mknew(node234);
126 t->root->elems[1] = t->root->elems[2] = NULL;
127 t->root->kids[0] = t->root->kids[1] = NULL;
128 t->root->kids[2] = t->root->kids[3] = NULL;
129 t->root->counts[0] = t->root->counts[1] = 0;
130 t->root->counts[2] = t->root->counts[3] = 0;
131 t->root->parent = NULL;
132 t->root->elems[0] = e;
133 LOG((" created root %p\n", t->root));
134 return orig_e;
135 }
136
137 np = &t->root;
138 while (*np) {
139 int childnum;
140 n = *np;
141 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
142 n,
143 n->kids[0], n->counts[0], n->elems[0],
144 n->kids[1], n->counts[1], n->elems[1],
145 n->kids[2], n->counts[2], n->elems[2],
146 n->kids[3], n->counts[3]));
147 if (index >= 0) {
148 if (!n->kids[0]) {
149 /*
150 * Leaf node. We want to insert at kid position
151 * equal to the index:
152 *
153 * 0 A 1 B 2 C 3
154 */
155 childnum = index;
156 } else {
157 /*
158 * Internal node. We always descend through it (add
159 * always starts at the bottom, never in the
160 * middle).
161 */
162 do { /* this is a do ... while (0) to allow `break' */
163 if (index <= n->counts[0]) {
164 childnum = 0;
165 break;
166 }
167 index -= n->counts[0] + 1;
168 if (index <= n->counts[1]) {
169 childnum = 1;
170 break;
171 }
172 index -= n->counts[1] + 1;
173 if (index <= n->counts[2]) {
174 childnum = 2;
175 break;
176 }
177 index -= n->counts[2] + 1;
178 if (index <= n->counts[3]) {
179 childnum = 3;
180 break;
181 }
182 return NULL; /* error: index out of range */
183 } while (0);
184 }
185 } else {
186 if ((c = t->cmp(e, n->elems[0])) < 0)
187 childnum = 0;
188 else if (c == 0)
189 return n->elems[0]; /* already exists */
190 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
191 childnum = 1;
192 else if (c == 0)
193 return n->elems[1]; /* already exists */
194 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
195 childnum = 2;
196 else if (c == 0)
197 return n->elems[2]; /* already exists */
198 else
199 childnum = 3;
200 }
201 np = &n->kids[childnum];
202 LOG((" moving to child %d (%p)\n", childnum, *np));
203 }
204
205 /*
206 * We need to insert the new element in n at position np.
207 */
208 left = NULL; lcount = 0;
209 right = NULL; rcount = 0;
210 while (n) {
211 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
212 n,
213 n->kids[0], n->counts[0], n->elems[0],
214 n->kids[1], n->counts[1], n->elems[1],
215 n->kids[2], n->counts[2], n->elems[2],
216 n->kids[3], n->counts[3]));
217 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
218 left, lcount, e, right, rcount, np - n->kids));
219 if (n->elems[1] == NULL) {
220 /*
221 * Insert in a 2-node; simple.
222 */
223 if (np == &n->kids[0]) {
224 LOG((" inserting on left of 2-node\n"));
225 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
226 n->elems[1] = n->elems[0];
227 n->kids[1] = right; n->counts[1] = rcount;
228 n->elems[0] = e;
229 n->kids[0] = left; n->counts[0] = lcount;
230 } else { /* np == &n->kids[1] */
231 LOG((" inserting on right of 2-node\n"));
232 n->kids[2] = right; n->counts[2] = rcount;
233 n->elems[1] = e;
234 n->kids[1] = left; n->counts[1] = lcount;
235 }
236 if (n->kids[0]) n->kids[0]->parent = n;
237 if (n->kids[1]) n->kids[1]->parent = n;
238 if (n->kids[2]) n->kids[2]->parent = n;
239 LOG((" done\n"));
240 break;
241 } else if (n->elems[2] == NULL) {
242 /*
243 * Insert in a 3-node; simple.
244 */
245 if (np == &n->kids[0]) {
246 LOG((" inserting on left of 3-node\n"));
247 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
248 n->elems[2] = n->elems[1];
249 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
250 n->elems[1] = n->elems[0];
251 n->kids[1] = right; n->counts[1] = rcount;
252 n->elems[0] = e;
253 n->kids[0] = left; n->counts[0] = lcount;
254 } else if (np == &n->kids[1]) {
255 LOG((" inserting in middle of 3-node\n"));
256 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
257 n->elems[2] = n->elems[1];
258 n->kids[2] = right; n->counts[2] = rcount;
259 n->elems[1] = e;
260 n->kids[1] = left; n->counts[1] = lcount;
261 } else { /* np == &n->kids[2] */
262 LOG((" inserting on right of 3-node\n"));
263 n->kids[3] = right; n->counts[3] = rcount;
264 n->elems[2] = e;
265 n->kids[2] = left; n->counts[2] = lcount;
266 }
267 if (n->kids[0]) n->kids[0]->parent = n;
268 if (n->kids[1]) n->kids[1]->parent = n;
269 if (n->kids[2]) n->kids[2]->parent = n;
270 if (n->kids[3]) n->kids[3]->parent = n;
271 LOG((" done\n"));
272 break;
273 } else {
274 node234 *m = mknew(node234);
275 m->parent = n->parent;
276 LOG((" splitting a 4-node; created new node %p\n", m));
277 /*
278 * Insert in a 4-node; split into a 2-node and a
279 * 3-node, and move focus up a level.
280 *
281 * I don't think it matters which way round we put the
282 * 2 and the 3. For simplicity, we'll put the 3 first
283 * always.
284 */
285 if (np == &n->kids[0]) {
286 m->kids[0] = left; m->counts[0] = lcount;
287 m->elems[0] = e;
288 m->kids[1] = right; m->counts[1] = rcount;
289 m->elems[1] = n->elems[0];
290 m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1];
291 e = n->elems[1];
292 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
293 n->elems[0] = n->elems[2];
294 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
295 } else if (np == &n->kids[1]) {
296 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
297 m->elems[0] = n->elems[0];
298 m->kids[1] = left; m->counts[1] = lcount;
299 m->elems[1] = e;
300 m->kids[2] = right; m->counts[2] = rcount;
301 e = n->elems[1];
302 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
303 n->elems[0] = n->elems[2];
304 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
305 } else if (np == &n->kids[2]) {
306 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
307 m->elems[0] = n->elems[0];
308 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
309 m->elems[1] = n->elems[1];
310 m->kids[2] = left; m->counts[2] = lcount;
311 /* e = e; */
312 n->kids[0] = right; n->counts[0] = rcount;
313 n->elems[0] = n->elems[2];
314 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
315 } else { /* np == &n->kids[3] */
316 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
317 m->elems[0] = n->elems[0];
318 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
319 m->elems[1] = n->elems[1];
320 m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2];
321 n->kids[0] = left; n->counts[0] = lcount;
322 n->elems[0] = e;
323 n->kids[1] = right; n->counts[1] = rcount;
324 e = n->elems[2];
325 }
326 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
327 m->counts[3] = n->counts[3] = n->counts[2] = 0;
328 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
329 if (m->kids[0]) m->kids[0]->parent = m;
330 if (m->kids[1]) m->kids[1]->parent = m;
331 if (m->kids[2]) m->kids[2]->parent = m;
332 if (n->kids[0]) n->kids[0]->parent = n;
333 if (n->kids[1]) n->kids[1]->parent = n;
334 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
335 m->kids[0], m->counts[0], m->elems[0],
336 m->kids[1], m->counts[1], m->elems[1],
337 m->kids[2], m->counts[2]));
338 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
339 n->kids[0], n->counts[0], n->elems[0],
340 n->kids[1], n->counts[1]));
341 left = m; lcount = countnode234(left);
342 right = n; rcount = countnode234(right);
343 }
344 if (n->parent)
345 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
346 n->parent->kids[1] == n ? &n->parent->kids[1] :
347 n->parent->kids[2] == n ? &n->parent->kids[2] :
348 &n->parent->kids[3]);
349 n = n->parent;
350 }
351
352 /*
353 * If we've come out of here by `break', n will still be
354 * non-NULL and all we need to do is go back up the tree
355 * updating counts. If we've come here because n is NULL, we
356 * need to create a new root for the tree because the old one
357 * has just split into two. */
358 if (n) {
359 while (n->parent) {
360 int count = countnode234(n);
361 int childnum;
362 childnum = (n->parent->kids[0] == n ? 0 :
363 n->parent->kids[1] == n ? 1 :
364 n->parent->kids[2] == n ? 2 : 3);
365 n->parent->counts[childnum] = count;
366 n = n->parent;
367 }
368 } else {
369 LOG((" root is overloaded, split into two\n"));
370 t->root = mknew(node234);
371 t->root->kids[0] = left; t->root->counts[0] = lcount;
372 t->root->elems[0] = e;
373 t->root->kids[1] = right; t->root->counts[1] = rcount;
374 t->root->elems[1] = NULL;
375 t->root->kids[2] = NULL; t->root->counts[2] = 0;
376 t->root->elems[2] = NULL;
377 t->root->kids[3] = NULL; t->root->counts[3] = 0;
378 t->root->parent = NULL;
379 if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
380 if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
381 LOG((" new root is %p/%d [%p] %p/%d\n",
382 t->root->kids[0], t->root->counts[0],
383 t->root->elems[0],
384 t->root->kids[1], t->root->counts[1]));
385 }
386
387 return orig_e;
388 }
389
390 void *add234(tree234 *t, void *e) {
391 if (!t->cmp) /* tree is unsorted */
392 return NULL;
393
394 return add234_internal(t, e, -1);
395 }
396 void *addpos234(tree234 *t, void *e, int index) {
397 if (index < 0 || /* index out of range */
398 t->cmp) /* tree is sorted */
399 return NULL; /* return failure */
400
401 return add234_internal(t, e, index); /* this checks the upper bound */
402 }
403
404 /*
405 * Look up the element at a given numeric index in a 2-3-4 tree.
406 * Returns NULL if the index is out of range.
407 */
408 void *index234(tree234 *t, int index) {
409 node234 *n;
410
411 if (!t->root)
412 return NULL; /* tree is empty */
413
414 if (index < 0 || index >= countnode234(t->root))
415 return NULL; /* out of range */
416
417 n = t->root;
418
419 while (n) {
420 if (index < n->counts[0])
421 n = n->kids[0];
422 else if (index -= n->counts[0] + 1, index < 0)
423 return n->elems[0];
424 else if (index < n->counts[1])
425 n = n->kids[1];
426 else if (index -= n->counts[1] + 1, index < 0)
427 return n->elems[1];
428 else if (index < n->counts[2])
429 n = n->kids[2];
430 else if (index -= n->counts[2] + 1, index < 0)
431 return n->elems[2];
432 else
433 n = n->kids[3];
434 }
435
436 /* We shouldn't ever get here. I wonder how we did. */
437 return NULL;
438 }
439
440 /*
441 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
442 * found. e is always passed as the first argument to cmp, so cmp
443 * can be an asymmetric function if desired. cmp can also be passed
444 * as NULL, in which case the compare function from the tree proper
445 * will be used.
446 */
447 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
448 int relation, int *index) {
449 node234 *n;
450 void *ret;
451 int c;
452 int idx, ecount, kcount, cmpret;
453
454 if (t->root == NULL)
455 return NULL;
456
457 if (cmp == NULL)
458 cmp = t->cmp;
459
460 n = t->root;
461 /*
462 * Attempt to find the element itself.
463 */
464 idx = 0;
465 ecount = -1;
466 /*
467 * Prepare a fake `cmp' result if e is NULL.
468 */
469 cmpret = 0;
470 if (e == NULL) {
471 assert(relation == REL234_LT || relation == REL234_GT);
472 if (relation == REL234_LT)
473 cmpret = +1; /* e is a max: always greater */
474 else if (relation == REL234_GT)
475 cmpret = -1; /* e is a min: always smaller */
476 }
477 while (1) {
478 for (kcount = 0; kcount < 4; kcount++) {
479 if (kcount >= 3 || n->elems[kcount] == NULL ||
480 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
481 break;
482 }
483 if (n->kids[kcount]) idx += n->counts[kcount];
484 if (c == 0) {
485 ecount = kcount;
486 break;
487 }
488 idx++;
489 }
490 if (ecount >= 0)
491 break;
492 if (n->kids[kcount])
493 n = n->kids[kcount];
494 else
495 break;
496 }
497
498 if (ecount >= 0) {
499 /*
500 * We have found the element we're looking for. It's
501 * n->elems[ecount], at tree index idx. If our search
502 * relation is EQ, LE or GE we can now go home.
503 */
504 if (relation != REL234_LT && relation != REL234_GT) {
505 if (index) *index = idx;
506 return n->elems[ecount];
507 }
508
509 /*
510 * Otherwise, we'll do an indexed lookup for the previous
511 * or next element. (It would be perfectly possible to
512 * implement these search types in a non-counted tree by
513 * going back up from where we are, but far more fiddly.)
514 */
515 if (relation == REL234_LT)
516 idx--;
517 else
518 idx++;
519 } else {
520 /*
521 * We've found our way to the bottom of the tree and we
522 * know where we would insert this node if we wanted to:
523 * we'd put it in in place of the (empty) subtree
524 * n->kids[kcount], and it would have index idx
525 *
526 * But the actual element isn't there. So if our search
527 * relation is EQ, we're doomed.
528 */
529 if (relation == REL234_EQ)
530 return NULL;
531
532 /*
533 * Otherwise, we must do an index lookup for index idx-1
534 * (if we're going left - LE or LT) or index idx (if we're
535 * going right - GE or GT).
536 */
537 if (relation == REL234_LT || relation == REL234_LE) {
538 idx--;
539 }
540 }
541
542 /*
543 * We know the index of the element we want; just call index234
544 * to do the rest. This will return NULL if the index is out of
545 * bounds, which is exactly what we want.
546 */
547 ret = index234(t, idx);
548 if (ret && index) *index = idx;
549 return ret;
550 }
551 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
552 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
553 }
554 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
555 return findrelpos234(t, e, cmp, relation, NULL);
556 }
557 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
558 return findrelpos234(t, e, cmp, REL234_EQ, index);
559 }
560
561 /*
562 * Delete an element e in a 2-3-4 tree. Does not free the element,
563 * merely removes all links to it from the tree nodes.
564 */
565 static void *delpos234_internal(tree234 *t, int index) {
566 node234 *n;
567 void *retval;
568 int ei = -1;
569
570 retval = 0;
571
572 n = t->root;
573 LOG(("deleting item %d from tree %p\n", index, t));
574 while (1) {
575 while (n) {
576 int ki;
577 node234 *sub;
578
579 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
580 n,
581 n->kids[0], n->counts[0], n->elems[0],
582 n->kids[1], n->counts[1], n->elems[1],
583 n->kids[2], n->counts[2], n->elems[2],
584 n->kids[3], n->counts[3],
585 index));
586 if (index < n->counts[0]) {
587 ki = 0;
588 } else if (index -= n->counts[0]+1, index < 0) {
589 ei = 0; break;
590 } else if (index < n->counts[1]) {
591 ki = 1;
592 } else if (index -= n->counts[1]+1, index < 0) {
593 ei = 1; break;
594 } else if (index < n->counts[2]) {
595 ki = 2;
596 } else if (index -= n->counts[2]+1, index < 0) {
597 ei = 2; break;
598 } else {
599 ki = 3;
600 }
601 /*
602 * Recurse down to subtree ki. If it has only one element,
603 * we have to do some transformation to start with.
604 */
605 LOG((" moving to subtree %d\n", ki));
606 sub = n->kids[ki];
607 if (!sub->elems[1]) {
608 LOG((" subtree has only one element!\n", ki));
609 if (ki > 0 && n->kids[ki-1]->elems[1]) {
610 /*
611 * Case 3a, left-handed variant. Child ki has
612 * only one element, but child ki-1 has two or
613 * more. So we need to move a subtree from ki-1
614 * to ki.
615 *
616 * . C . . B .
617 * / \ -> / \
618 * [more] a A b B c d D e [more] a A b c C d D e
619 */
620 node234 *sib = n->kids[ki-1];
621 int lastelem = (sib->elems[2] ? 2 :
622 sib->elems[1] ? 1 : 0);
623 sub->kids[2] = sub->kids[1];
624 sub->counts[2] = sub->counts[1];
625 sub->elems[1] = sub->elems[0];
626 sub->kids[1] = sub->kids[0];
627 sub->counts[1] = sub->counts[0];
628 sub->elems[0] = n->elems[ki-1];
629 sub->kids[0] = sib->kids[lastelem+1];
630 sub->counts[0] = sib->counts[lastelem+1];
631 if (sub->kids[0]) sub->kids[0]->parent = sub;
632 n->elems[ki-1] = sib->elems[lastelem];
633 sib->kids[lastelem+1] = NULL;
634 sib->counts[lastelem+1] = 0;
635 sib->elems[lastelem] = NULL;
636 n->counts[ki] = countnode234(sub);
637 LOG((" case 3a left\n"));
638 LOG((" index and left subtree count before adjustment: %d, %d\n",
639 index, n->counts[ki-1]));
640 index += n->counts[ki-1];
641 n->counts[ki-1] = countnode234(sib);
642 index -= n->counts[ki-1];
643 LOG((" index and left subtree count after adjustment: %d, %d\n",
644 index, n->counts[ki-1]));
645 } else if (ki < 3 && n->kids[ki+1] &&
646 n->kids[ki+1]->elems[1]) {
647 /*
648 * Case 3a, right-handed variant. ki has only
649 * one element but ki+1 has two or more. Move a
650 * subtree from ki+1 to ki.
651 *
652 * . B . . C .
653 * / \ -> / \
654 * a A b c C d D e [more] a A b B c d D e [more]
655 */
656 node234 *sib = n->kids[ki+1];
657 int j;
658 sub->elems[1] = n->elems[ki];
659 sub->kids[2] = sib->kids[0];
660 sub->counts[2] = sib->counts[0];
661 if (sub->kids[2]) sub->kids[2]->parent = sub;
662 n->elems[ki] = sib->elems[0];
663 sib->kids[0] = sib->kids[1];
664 sib->counts[0] = sib->counts[1];
665 for (j = 0; j < 2 && sib->elems[j+1]; j++) {
666 sib->kids[j+1] = sib->kids[j+2];
667 sib->counts[j+1] = sib->counts[j+2];
668 sib->elems[j] = sib->elems[j+1];
669 }
670 sib->kids[j+1] = NULL;
671 sib->counts[j+1] = 0;
672 sib->elems[j] = NULL;
673 n->counts[ki] = countnode234(sub);
674 n->counts[ki+1] = countnode234(sib);
675 LOG((" case 3a right\n"));
676 } else {
677 /*
678 * Case 3b. ki has only one element, and has no
679 * neighbour with more than one. So pick a
680 * neighbour and merge it with ki, taking an
681 * element down from n to go in the middle.
682 *
683 * . B . .
684 * / \ -> |
685 * a A b c C d a A b B c C d
686 *
687 * (Since at all points we have avoided
688 * descending to a node with only one element,
689 * we can be sure that n is not reduced to
690 * nothingness by this move, _unless_ it was
691 * the very first node, ie the root of the
692 * tree. In that case we remove the now-empty
693 * root and replace it with its single large
694 * child as shown.)
695 */
696 node234 *sib;
697 int j;
698
699 if (ki > 0) {
700 ki--;
701 index += n->counts[ki] + 1;
702 }
703 sib = n->kids[ki];
704 sub = n->kids[ki+1];
705
706 sub->kids[3] = sub->kids[1];
707 sub->counts[3] = sub->counts[1];
708 sub->elems[2] = sub->elems[0];
709 sub->kids[2] = sub->kids[0];
710 sub->counts[2] = sub->counts[0];
711 sub->elems[1] = n->elems[ki];
712 sub->kids[1] = sib->kids[1];
713 sub->counts[1] = sib->counts[1];
714 if (sub->kids[1]) sub->kids[1]->parent = sub;
715 sub->elems[0] = sib->elems[0];
716 sub->kids[0] = sib->kids[0];
717 sub->counts[0] = sib->counts[0];
718 if (sub->kids[0]) sub->kids[0]->parent = sub;
719
720 n->counts[ki+1] = countnode234(sub);
721
722 sfree(sib);
723
724 /*
725 * That's built the big node in sub. Now we
726 * need to remove the reference to sib in n.
727 */
728 for (j = ki; j < 3 && n->kids[j+1]; j++) {
729 n->kids[j] = n->kids[j+1];
730 n->counts[j] = n->counts[j+1];
731 n->elems[j] = j<2 ? n->elems[j+1] : NULL;
732 }
733 n->kids[j] = NULL;
734 n->counts[j] = 0;
735 if (j < 3) n->elems[j] = NULL;
736 LOG((" case 3b ki=%d\n", ki));
737
738 if (!n->elems[0]) {
739 /*
740 * The root is empty and needs to be
741 * removed.
742 */
743 LOG((" shifting root!\n"));
744 t->root = sub;
745 sub->parent = NULL;
746 sfree(n);
747 }
748 }
749 }
750 n = sub;
751 }
752 if (!retval)
753 retval = n->elems[ei];
754
755 if (ei==-1)
756 return NULL; /* although this shouldn't happen */
757
758 /*
759 * Treat special case: this is the one remaining item in
760 * the tree. n is the tree root (no parent), has one
761 * element (no elems[1]), and has no kids (no kids[0]).
762 */
763 if (!n->parent && !n->elems[1] && !n->kids[0]) {
764 LOG((" removed last element in tree\n"));
765 sfree(n);
766 t->root = NULL;
767 return retval;
768 }
769
770 /*
771 * Now we have the element we want, as n->elems[ei], and we
772 * have also arranged for that element not to be the only
773 * one in its node. So...
774 */
775
776 if (!n->kids[0] && n->elems[1]) {
777 /*
778 * Case 1. n is a leaf node with more than one element,
779 * so it's _really easy_. Just delete the thing and
780 * we're done.
781 */
782 int i;
783 LOG((" case 1\n"));
784 for (i = ei; i < 2 && n->elems[i+1]; i++)
785 n->elems[i] = n->elems[i+1];
786 n->elems[i] = NULL;
787 /*
788 * Having done that to the leaf node, we now go back up
789 * the tree fixing the counts.
790 */
791 while (n->parent) {
792 int childnum;
793 childnum = (n->parent->kids[0] == n ? 0 :
794 n->parent->kids[1] == n ? 1 :
795 n->parent->kids[2] == n ? 2 : 3);
796 n->parent->counts[childnum]--;
797 n = n->parent;
798 }
799 return retval; /* finished! */
800 } else if (n->kids[ei]->elems[1]) {
801 /*
802 * Case 2a. n is an internal node, and the root of the
803 * subtree to the left of e has more than one element.
804 * So find the predecessor p to e (ie the largest node
805 * in that subtree), place it where e currently is, and
806 * then start the deletion process over again on the
807 * subtree with p as target.
808 */
809 node234 *m = n->kids[ei];
810 void *target;
811 LOG((" case 2a\n"));
812 while (m->kids[0]) {
813 m = (m->kids[3] ? m->kids[3] :
814 m->kids[2] ? m->kids[2] :
815 m->kids[1] ? m->kids[1] : m->kids[0]);
816 }
817 target = (m->elems[2] ? m->elems[2] :
818 m->elems[1] ? m->elems[1] : m->elems[0]);
819 n->elems[ei] = target;
820 index = n->counts[ei]-1;
821 n = n->kids[ei];
822 } else if (n->kids[ei+1]->elems[1]) {
823 /*
824 * Case 2b, symmetric to 2a but s/left/right/ and
825 * s/predecessor/successor/. (And s/largest/smallest/).
826 */
827 node234 *m = n->kids[ei+1];
828 void *target;
829 LOG((" case 2b\n"));
830 while (m->kids[0]) {
831 m = m->kids[0];
832 }
833 target = m->elems[0];
834 n->elems[ei] = target;
835 n = n->kids[ei+1];
836 index = 0;
837 } else {
838 /*
839 * Case 2c. n is an internal node, and the subtrees to
840 * the left and right of e both have only one element.
841 * So combine the two subnodes into a single big node
842 * with their own elements on the left and right and e
843 * in the middle, then restart the deletion process on
844 * that subtree, with e still as target.
845 */
846 node234 *a = n->kids[ei], *b = n->kids[ei+1];
847 int j;
848
849 LOG((" case 2c\n"));
850 a->elems[1] = n->elems[ei];
851 a->kids[2] = b->kids[0];
852 a->counts[2] = b->counts[0];
853 if (a->kids[2]) a->kids[2]->parent = a;
854 a->elems[2] = b->elems[0];
855 a->kids[3] = b->kids[1];
856 a->counts[3] = b->counts[1];
857 if (a->kids[3]) a->kids[3]->parent = a;
858 sfree(b);
859 n->counts[ei] = countnode234(a);
860 /*
861 * That's built the big node in a, and destroyed b. Now
862 * remove the reference to b (and e) in n.
863 */
864 for (j = ei; j < 2 && n->elems[j+1]; j++) {
865 n->elems[j] = n->elems[j+1];
866 n->kids[j+1] = n->kids[j+2];
867 n->counts[j+1] = n->counts[j+2];
868 }
869 n->elems[j] = NULL;
870 n->kids[j+1] = NULL;
871 n->counts[j+1] = 0;
872 /*
873 * It's possible, in this case, that we've just removed
874 * the only element in the root of the tree. If so,
875 * shift the root.
876 */
877 if (n->elems[0] == NULL) {
878 LOG((" shifting root!\n"));
879 t->root = a;
880 a->parent = NULL;
881 sfree(n);
882 }
883 /*
884 * Now go round the deletion process again, with n
885 * pointing at the new big node and e still the same.
886 */
887 n = a;
888 index = a->counts[0] + a->counts[1] + 1;
889 }
890 }
891 }
892 void *delpos234(tree234 *t, int index) {
893 if (index < 0 || index >= countnode234(t->root))
894 return NULL;
895 return delpos234_internal(t, index);
896 }
897 void *del234(tree234 *t, void *e) {
898 int index;
899 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
900 return NULL; /* it wasn't in there anyway */
901 return delpos234_internal(t, index); /* it's there; delete it. */
902 }
903
904 #ifdef TEST
905
906 /*
907 * Test code for the 2-3-4 tree. This code maintains an alternative
908 * representation of the data in the tree, in an array (using the
909 * obvious and slow insert and delete functions). After each tree
910 * operation, the verify() function is called, which ensures all
911 * the tree properties are preserved:
912 * - node->child->parent always equals node
913 * - tree->root->parent always equals NULL
914 * - number of kids == 0 or number of elements + 1;
915 * - tree has the same depth everywhere
916 * - every node has at least one element
917 * - subtree element counts are accurate
918 * - any NULL kid pointer is accompanied by a zero count
919 * - in a sorted tree: ordering property between elements of a
920 * node and elements of its children is preserved
921 * and also ensures the list represented by the tree is the same
922 * list it should be. (This last check also doubly verifies the
923 * ordering properties, because the `same list it should be' is by
924 * definition correctly ordered. It also ensures all nodes are
925 * distinct, because the enum functions would get caught in a loop
926 * if not.)
927 */
928
929 #include <stdarg.h>
930
931 #define srealloc realloc
932
933 /*
934 * Error reporting function.
935 */
936 void error(char *fmt, ...) {
937 va_list ap;
938 printf("ERROR: ");
939 va_start(ap, fmt);
940 vfprintf(stdout, fmt, ap);
941 va_end(ap);
942 printf("\n");
943 }
944
945 /* The array representation of the data. */
946 void **array;
947 int arraylen, arraysize;
948 cmpfn234 cmp;
949
950 /* The tree representation of the same data. */
951 tree234 *tree;
952
953 typedef struct {
954 int treedepth;
955 int elemcount;
956 } chkctx;
957
958 int chknode(chkctx *ctx, int level, node234 *node,
959 void *lowbound, void *highbound) {
960 int nkids, nelems;
961 int i;
962 int count;
963
964 /* Count the non-NULL kids. */
965 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
966 /* Ensure no kids beyond the first NULL are non-NULL. */
967 for (i = nkids; i < 4; i++)
968 if (node->kids[i]) {
969 error("node %p: nkids=%d but kids[%d] non-NULL",
970 node, nkids, i);
971 } else if (node->counts[i]) {
972 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
973 node, i, i, node->counts[i]);
974 }
975
976 /* Count the non-NULL elements. */
977 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
978 /* Ensure no elements beyond the first NULL are non-NULL. */
979 for (i = nelems; i < 3; i++)
980 if (node->elems[i]) {
981 error("node %p: nelems=%d but elems[%d] non-NULL",
982 node, nelems, i);
983 }
984
985 if (nkids == 0) {
986 /*
987 * If nkids==0, this is a leaf node; verify that the tree
988 * depth is the same everywhere.
989 */
990 if (ctx->treedepth < 0)
991 ctx->treedepth = level; /* we didn't know the depth yet */
992 else if (ctx->treedepth != level)
993 error("node %p: leaf at depth %d, previously seen depth %d",
994 node, level, ctx->treedepth);
995 } else {
996 /*
997 * If nkids != 0, then it should be nelems+1, unless nelems
998 * is 0 in which case nkids should also be 0 (and so we
999 * shouldn't be in this condition at all).
1000 */
1001 int shouldkids = (nelems ? nelems+1 : 0);
1002 if (nkids != shouldkids) {
1003 error("node %p: %d elems should mean %d kids but has %d",
1004 node, nelems, shouldkids, nkids);
1005 }
1006 }
1007
1008 /*
1009 * nelems should be at least 1.
1010 */
1011 if (nelems == 0) {
1012 error("node %p: no elems", node, nkids);
1013 }
1014
1015 /*
1016 * Add nelems to the running element count of the whole tree.
1017 */
1018 ctx->elemcount += nelems;
1019
1020 /*
1021 * Check ordering property: all elements should be strictly >
1022 * lowbound, strictly < highbound, and strictly < each other in
1023 * sequence. (lowbound and highbound are NULL at edges of tree
1024 * - both NULL at root node - and NULL is considered to be <
1025 * everything and > everything. IYSWIM.)
1026 */
1027 if (cmp) {
1028 for (i = -1; i < nelems; i++) {
1029 void *lower = (i == -1 ? lowbound : node->elems[i]);
1030 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1031 if (lower && higher && cmp(lower, higher) >= 0) {
1032 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1033 node, i, lower, i+1, higher);
1034 }
1035 }
1036 }
1037
1038 /*
1039 * Check parent pointers: all non-NULL kids should have a
1040 * parent pointer coming back to this node.
1041 */
1042 for (i = 0; i < nkids; i++)
1043 if (node->kids[i]->parent != node) {
1044 error("node %p kid %d: parent ptr is %p not %p",
1045 node, i, node->kids[i]->parent, node);
1046 }
1047
1048
1049 /*
1050 * Now (finally!) recurse into subtrees.
1051 */
1052 count = nelems;
1053
1054 for (i = 0; i < nkids; i++) {
1055 void *lower = (i == 0 ? lowbound : node->elems[i-1]);
1056 void *higher = (i >= nelems ? highbound : node->elems[i]);
1057 int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
1058 if (node->counts[i] != subcount) {
1059 error("node %p kid %d: count says %d, subtree really has %d",
1060 node, i, node->counts[i], subcount);
1061 }
1062 count += subcount;
1063 }
1064
1065 return count;
1066 }
1067
1068 void verify(void) {
1069 chkctx ctx;
1070 int i;
1071 void *p;
1072
1073 ctx.treedepth = -1; /* depth unknown yet */
1074 ctx.elemcount = 0; /* no elements seen yet */
1075 /*
1076 * Verify validity of tree properties.
1077 */
1078 if (tree->root) {
1079 if (tree->root->parent != NULL)
1080 error("root->parent is %p should be null", tree->root->parent);
1081 chknode(&ctx, 0, tree->root, NULL, NULL);
1082 }
1083 printf("tree depth: %d\n", ctx.treedepth);
1084 /*
1085 * Enumerate the tree and ensure it matches up to the array.
1086 */
1087 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1088 if (i >= arraylen)
1089 error("tree contains more than %d elements", arraylen);
1090 if (array[i] != p)
1091 error("enum at position %d: array says %s, tree says %s",
1092 i, array[i], p);
1093 }
1094 if (ctx.elemcount != i) {
1095 error("tree really contains %d elements, enum gave %d",
1096 ctx.elemcount, i);
1097 }
1098 if (i < arraylen) {
1099 error("enum gave only %d elements, array has %d", i, arraylen);
1100 }
1101 i = count234(tree);
1102 if (ctx.elemcount != i) {
1103 error("tree really contains %d elements, count234 gave %d",
1104 ctx.elemcount, i);
1105 }
1106 }
1107
1108 void internal_addtest(void *elem, int index, void *realret) {
1109 int i, j;
1110 void *retval;
1111
1112 if (arraysize < arraylen+1) {
1113 arraysize = arraylen+1+256;
1114 array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
1115 srealloc(array, arraysize*sizeof(*array)));
1116 }
1117
1118 i = index;
1119 /* now i points to the first element >= elem */
1120 retval = elem; /* expect elem returned (success) */
1121 for (j = arraylen; j > i; j--)
1122 array[j] = array[j-1];
1123 array[i] = elem; /* add elem to array */
1124 arraylen++;
1125
1126 if (realret != retval) {
1127 error("add: retval was %p expected %p", realret, retval);
1128 }
1129
1130 verify();
1131 }
1132
1133 void addtest(void *elem) {
1134 int i;
1135 void *realret;
1136
1137 realret = add234(tree, elem);
1138
1139 i = 0;
1140 while (i < arraylen && cmp(elem, array[i]) > 0)
1141 i++;
1142 if (i < arraylen && !cmp(elem, array[i])) {
1143 void *retval = array[i]; /* expect that returned not elem */
1144 if (realret != retval) {
1145 error("add: retval was %p expected %p", realret, retval);
1146 }
1147 } else
1148 internal_addtest(elem, i, realret);
1149 }
1150
1151 void addpostest(void *elem, int i) {
1152 void *realret;
1153
1154 realret = addpos234(tree, elem, i);
1155
1156 internal_addtest(elem, i, realret);
1157 }
1158
1159 void delpostest(int i) {
1160 int index = i;
1161 void *elem = array[i], *ret;
1162
1163 /* i points to the right element */
1164 while (i < arraylen-1) {
1165 array[i] = array[i+1];
1166 i++;
1167 }
1168 arraylen--; /* delete elem from array */
1169
1170 if (tree->cmp)
1171 ret = del234(tree, elem);
1172 else
1173 ret = delpos234(tree, index);
1174
1175 if (ret != elem) {
1176 error("del returned %p, expected %p", ret, elem);
1177 }
1178
1179 verify();
1180 }
1181
1182 void deltest(void *elem) {
1183 int i;
1184
1185 i = 0;
1186 while (i < arraylen && cmp(elem, array[i]) > 0)
1187 i++;
1188 if (i >= arraylen || cmp(elem, array[i]) != 0)
1189 return; /* don't do it! */
1190 delpostest(i);
1191 }
1192
1193 /* A sample data set and test utility. Designed for pseudo-randomness,
1194 * and yet repeatability. */
1195
1196 /*
1197 * This random number generator uses the `portable implementation'
1198 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1199 * change it if not.
1200 */
1201 int randomnumber(unsigned *seed) {
1202 *seed *= 1103515245;
1203 *seed += 12345;
1204 return ((*seed) / 65536) % 32768;
1205 }
1206
1207 int mycmp(void *av, void *bv) {
1208 char const *a = (char const *)av;
1209 char const *b = (char const *)bv;
1210 return strcmp(a, b);
1211 }
1212
1213 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1214
1215 char *strings[] = {
1216 "a", "ab", "absque", "coram", "de",
1217 "palam", "clam", "cum", "ex", "e",
1218 "sine", "tenus", "pro", "prae",
1219 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1220 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1221 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1222 "murfl", "spoo", "breen", "flarn", "octothorpe",
1223 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1224 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1225 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1226 "wand", "ring", "amulet"
1227 };
1228
1229 #define NSTR lenof(strings)
1230
1231 int findtest(void) {
1232 const static int rels[] = {
1233 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1234 };
1235 const static char *const relnames[] = {
1236 "EQ", "GE", "LE", "LT", "GT"
1237 };
1238 int i, j, rel, index;
1239 char *p, *ret, *realret, *realret2;
1240 int lo, hi, mid, c;
1241
1242 for (i = 0; i < NSTR; i++) {
1243 p = strings[i];
1244 for (j = 0; j < sizeof(rels)/sizeof(*rels); j++) {
1245 rel = rels[j];
1246
1247 lo = 0; hi = arraylen-1;
1248 while (lo <= hi) {
1249 mid = (lo + hi) / 2;
1250 c = strcmp(p, array[mid]);
1251 if (c < 0)
1252 hi = mid-1;
1253 else if (c > 0)
1254 lo = mid+1;
1255 else
1256 break;
1257 }
1258
1259 if (c == 0) {
1260 if (rel == REL234_LT)
1261 ret = (mid > 0 ? array[--mid] : NULL);
1262 else if (rel == REL234_GT)
1263 ret = (mid < arraylen-1 ? array[++mid] : NULL);
1264 else
1265 ret = array[mid];
1266 } else {
1267 assert(lo == hi+1);
1268 if (rel == REL234_LT || rel == REL234_LE) {
1269 mid = hi;
1270 ret = (hi >= 0 ? array[hi] : NULL);
1271 } else if (rel == REL234_GT || rel == REL234_GE) {
1272 mid = lo;
1273 ret = (lo < arraylen ? array[lo] : NULL);
1274 } else
1275 ret = NULL;
1276 }
1277
1278 realret = findrelpos234(tree, p, NULL, rel, &index);
1279 if (realret != ret) {
1280 error("find(\"%s\",%s) gave %s should be %s",
1281 p, relnames[j], realret, ret);
1282 }
1283 if (realret && index != mid) {
1284 error("find(\"%s\",%s) gave %d should be %d",
1285 p, relnames[j], index, mid);
1286 }
1287 if (realret && rel == REL234_EQ) {
1288 realret2 = index234(tree, index);
1289 if (realret2 != realret) {
1290 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1291 p, relnames[j], realret, index, index, realret2);
1292 }
1293 }
1294 #if 0
1295 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1296 realret, index);
1297 #endif
1298 }
1299 }
1300
1301 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1302 if (arraylen && (realret != array[0] || index != 0)) {
1303 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1304 realret, index, array[0]);
1305 } else if (!arraylen && (realret != NULL)) {
1306 error("find(NULL,GT) gave %s(%d) should be NULL",
1307 realret, index);
1308 }
1309
1310 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1311 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
1312 error("find(NULL,LT) gave %s(%d) should be %s(0)",
1313 realret, index, array[arraylen-1]);
1314 } else if (!arraylen && (realret != NULL)) {
1315 error("find(NULL,LT) gave %s(%d) should be NULL",
1316 realret, index);
1317 }
1318 }
1319
1320 int main(void) {
1321 int in[NSTR];
1322 int i, j, k;
1323 unsigned seed = 0;
1324
1325 for (i = 0; i < NSTR; i++) in[i] = 0;
1326 array = NULL;
1327 arraylen = arraysize = 0;
1328 tree = newtree234(mycmp);
1329 cmp = mycmp;
1330
1331 verify();
1332 for (i = 0; i < 10000; i++) {
1333 j = randomnumber(&seed);
1334 j %= NSTR;
1335 printf("trial: %d\n", i);
1336 if (in[j]) {
1337 printf("deleting %s (%d)\n", strings[j], j);
1338 deltest(strings[j]);
1339 in[j] = 0;
1340 } else {
1341 printf("adding %s (%d)\n", strings[j], j);
1342 addtest(strings[j]);
1343 in[j] = 1;
1344 }
1345 findtest();
1346 }
1347
1348 while (arraylen > 0) {
1349 j = randomnumber(&seed);
1350 j %= arraylen;
1351 deltest(array[j]);
1352 }
1353
1354 freetree234(tree);
1355
1356 /*
1357 * Now try an unsorted tree. We don't really need to test
1358 * delpos234 because we know del234 is based on it, so it's
1359 * already been tested in the above sorted-tree code; but for
1360 * completeness we'll use it to tear down our unsorted tree
1361 * once we've built it.
1362 */
1363 tree = newtree234(NULL);
1364 cmp = NULL;
1365 verify();
1366 for (i = 0; i < 1000; i++) {
1367 printf("trial: %d\n", i);
1368 j = randomnumber(&seed);
1369 j %= NSTR;
1370 k = randomnumber(&seed);
1371 k %= count234(tree)+1;
1372 printf("adding string %s at index %d\n", strings[j], k);
1373 addpostest(strings[j], k);
1374 }
1375 while (count234(tree) > 0) {
1376 printf("cleanup: tree size %d\n", count234(tree));
1377 j = randomnumber(&seed);
1378 j %= count234(tree);
1379 printf("deleting string %s from index %d\n", array[j], j);
1380 delpostest(j);
1381 }
1382
1383 return 0;
1384 }
1385
1386 #endif