Replace PuTTY's 2-3-4 tree implementation with the shiny new counted
[u/mdw/putty] / tree234.c
1 /*
2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
3 *
4 * This file is copyright 1999-2001 Simon Tatham.
5 *
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
13 * conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <assert.h>
31
32 #include "tree234.h"
33
34 #define smalloc malloc
35 #define sfree free
36
37 #define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
38
39 #ifdef TEST
40 #define LOG(x) (printf x)
41 #else
42 #define LOG(x)
43 #endif
44
45 typedef struct node234_Tag node234;
46
47 struct tree234_Tag {
48 node234 *root;
49 cmpfn234 cmp;
50 };
51
52 struct node234_Tag {
53 node234 *parent;
54 node234 *kids[4];
55 int counts[4];
56 void *elems[3];
57 };
58
59 /*
60 * Create a 2-3-4 tree.
61 */
62 tree234 *newtree234(cmpfn234 cmp) {
63 tree234 *ret = mknew(tree234);
64 LOG(("created tree %p\n", ret));
65 ret->root = NULL;
66 ret->cmp = cmp;
67 return ret;
68 }
69
70 /*
71 * Free a 2-3-4 tree (not including freeing the elements).
72 */
73 static void freenode234(node234 *n) {
74 if (!n)
75 return;
76 freenode234(n->kids[0]);
77 freenode234(n->kids[1]);
78 freenode234(n->kids[2]);
79 freenode234(n->kids[3]);
80 sfree(n);
81 }
82 void freetree234(tree234 *t) {
83 freenode234(t->root);
84 sfree(t);
85 }
86
87 /*
88 * Internal function to count a node.
89 */
90 static int countnode234(node234 *n) {
91 int count = 0;
92 int i;
93 for (i = 0; i < 4; i++)
94 count += n->counts[i];
95 for (i = 0; i < 3; i++)
96 if (n->elems[i])
97 count++;
98 return count;
99 }
100
101 /*
102 * Count the elements in a tree.
103 */
104 int count234(tree234 *t) {
105 if (t->root)
106 return countnode234(t->root);
107 else
108 return 0;
109 }
110
111 /*
112 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
113 * an existing element compares equal, returns that.
114 */
115 static void *add234_internal(tree234 *t, void *e, int index) {
116 node234 *n, **np, *left, *right;
117 void *orig_e = e;
118 int c, lcount, rcount;
119
120 LOG(("adding node %p to tree %p\n", e, t));
121 if (t->root == NULL) {
122 t->root = mknew(node234);
123 t->root->elems[1] = t->root->elems[2] = NULL;
124 t->root->kids[0] = t->root->kids[1] = NULL;
125 t->root->kids[2] = t->root->kids[3] = NULL;
126 t->root->counts[0] = t->root->counts[1] = 0;
127 t->root->counts[2] = t->root->counts[3] = 0;
128 t->root->parent = NULL;
129 t->root->elems[0] = e;
130 LOG((" created root %p\n", t->root));
131 return orig_e;
132 }
133
134 np = &t->root;
135 while (*np) {
136 int childnum;
137 n = *np;
138 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
139 n,
140 n->kids[0], n->counts[0], n->elems[0],
141 n->kids[1], n->counts[1], n->elems[1],
142 n->kids[2], n->counts[2], n->elems[2],
143 n->kids[3], n->counts[3]));
144 if (index >= 0) {
145 if (!n->kids[0]) {
146 /*
147 * Leaf node. We want to insert at kid position
148 * equal to the index:
149 *
150 * 0 A 1 B 2 C 3
151 */
152 childnum = index;
153 } else {
154 /*
155 * Internal node. We always descend through it (add
156 * always starts at the bottom, never in the
157 * middle).
158 */
159 do { /* this is a do ... while (0) to allow `break' */
160 if (index <= n->counts[0]) {
161 childnum = 0;
162 break;
163 }
164 index -= n->counts[0] + 1;
165 if (index <= n->counts[1]) {
166 childnum = 1;
167 break;
168 }
169 index -= n->counts[1] + 1;
170 if (index <= n->counts[2]) {
171 childnum = 2;
172 break;
173 }
174 index -= n->counts[2] + 1;
175 if (index <= n->counts[3]) {
176 childnum = 3;
177 break;
178 }
179 return NULL; /* error: index out of range */
180 } while (0);
181 }
182 } else {
183 if ((c = t->cmp(e, n->elems[0])) < 0)
184 childnum = 0;
185 else if (c == 0)
186 return n->elems[0]; /* already exists */
187 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
188 childnum = 1;
189 else if (c == 0)
190 return n->elems[1]; /* already exists */
191 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
192 childnum = 2;
193 else if (c == 0)
194 return n->elems[2]; /* already exists */
195 else
196 childnum = 3;
197 }
198 np = &n->kids[childnum];
199 LOG((" moving to child %d (%p)\n", childnum, *np));
200 }
201
202 /*
203 * We need to insert the new element in n at position np.
204 */
205 left = NULL; lcount = 0;
206 right = NULL; rcount = 0;
207 while (n) {
208 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
209 n,
210 n->kids[0], n->counts[0], n->elems[0],
211 n->kids[1], n->counts[1], n->elems[1],
212 n->kids[2], n->counts[2], n->elems[2],
213 n->kids[3], n->counts[3]));
214 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
215 left, lcount, e, right, rcount, np - n->kids));
216 if (n->elems[1] == NULL) {
217 /*
218 * Insert in a 2-node; simple.
219 */
220 if (np == &n->kids[0]) {
221 LOG((" inserting on left of 2-node\n"));
222 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
223 n->elems[1] = n->elems[0];
224 n->kids[1] = right; n->counts[1] = rcount;
225 n->elems[0] = e;
226 n->kids[0] = left; n->counts[0] = lcount;
227 } else { /* np == &n->kids[1] */
228 LOG((" inserting on right of 2-node\n"));
229 n->kids[2] = right; n->counts[2] = rcount;
230 n->elems[1] = e;
231 n->kids[1] = left; n->counts[1] = lcount;
232 }
233 if (n->kids[0]) n->kids[0]->parent = n;
234 if (n->kids[1]) n->kids[1]->parent = n;
235 if (n->kids[2]) n->kids[2]->parent = n;
236 LOG((" done\n"));
237 break;
238 } else if (n->elems[2] == NULL) {
239 /*
240 * Insert in a 3-node; simple.
241 */
242 if (np == &n->kids[0]) {
243 LOG((" inserting on left of 3-node\n"));
244 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
245 n->elems[2] = n->elems[1];
246 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
247 n->elems[1] = n->elems[0];
248 n->kids[1] = right; n->counts[1] = rcount;
249 n->elems[0] = e;
250 n->kids[0] = left; n->counts[0] = lcount;
251 } else if (np == &n->kids[1]) {
252 LOG((" inserting in middle of 3-node\n"));
253 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
254 n->elems[2] = n->elems[1];
255 n->kids[2] = right; n->counts[2] = rcount;
256 n->elems[1] = e;
257 n->kids[1] = left; n->counts[1] = lcount;
258 } else { /* np == &n->kids[2] */
259 LOG((" inserting on right of 3-node\n"));
260 n->kids[3] = right; n->counts[3] = rcount;
261 n->elems[2] = e;
262 n->kids[2] = left; n->counts[2] = lcount;
263 }
264 if (n->kids[0]) n->kids[0]->parent = n;
265 if (n->kids[1]) n->kids[1]->parent = n;
266 if (n->kids[2]) n->kids[2]->parent = n;
267 if (n->kids[3]) n->kids[3]->parent = n;
268 LOG((" done\n"));
269 break;
270 } else {
271 node234 *m = mknew(node234);
272 m->parent = n->parent;
273 LOG((" splitting a 4-node; created new node %p\n", m));
274 /*
275 * Insert in a 4-node; split into a 2-node and a
276 * 3-node, and move focus up a level.
277 *
278 * I don't think it matters which way round we put the
279 * 2 and the 3. For simplicity, we'll put the 3 first
280 * always.
281 */
282 if (np == &n->kids[0]) {
283 m->kids[0] = left; m->counts[0] = lcount;
284 m->elems[0] = e;
285 m->kids[1] = right; m->counts[1] = rcount;
286 m->elems[1] = n->elems[0];
287 m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1];
288 e = n->elems[1];
289 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
290 n->elems[0] = n->elems[2];
291 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
292 } else if (np == &n->kids[1]) {
293 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
294 m->elems[0] = n->elems[0];
295 m->kids[1] = left; m->counts[1] = lcount;
296 m->elems[1] = e;
297 m->kids[2] = right; m->counts[2] = rcount;
298 e = n->elems[1];
299 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
300 n->elems[0] = n->elems[2];
301 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
302 } else if (np == &n->kids[2]) {
303 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
304 m->elems[0] = n->elems[0];
305 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
306 m->elems[1] = n->elems[1];
307 m->kids[2] = left; m->counts[2] = lcount;
308 /* e = e; */
309 n->kids[0] = right; n->counts[0] = rcount;
310 n->elems[0] = n->elems[2];
311 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
312 } else { /* np == &n->kids[3] */
313 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
314 m->elems[0] = n->elems[0];
315 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
316 m->elems[1] = n->elems[1];
317 m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2];
318 n->kids[0] = left; n->counts[0] = lcount;
319 n->elems[0] = e;
320 n->kids[1] = right; n->counts[1] = rcount;
321 e = n->elems[2];
322 }
323 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
324 m->counts[3] = n->counts[3] = n->counts[2] = 0;
325 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
326 if (m->kids[0]) m->kids[0]->parent = m;
327 if (m->kids[1]) m->kids[1]->parent = m;
328 if (m->kids[2]) m->kids[2]->parent = m;
329 if (n->kids[0]) n->kids[0]->parent = n;
330 if (n->kids[1]) n->kids[1]->parent = n;
331 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
332 m->kids[0], m->counts[0], m->elems[0],
333 m->kids[1], m->counts[1], m->elems[1],
334 m->kids[2], m->counts[2]));
335 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
336 n->kids[0], n->counts[0], n->elems[0],
337 n->kids[1], n->counts[1]));
338 left = m; lcount = countnode234(left);
339 right = n; rcount = countnode234(right);
340 }
341 if (n->parent)
342 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
343 n->parent->kids[1] == n ? &n->parent->kids[1] :
344 n->parent->kids[2] == n ? &n->parent->kids[2] :
345 &n->parent->kids[3]);
346 n = n->parent;
347 }
348
349 /*
350 * If we've come out of here by `break', n will still be
351 * non-NULL and all we need to do is go back up the tree
352 * updating counts. If we've come here because n is NULL, we
353 * need to create a new root for the tree because the old one
354 * has just split into two. */
355 if (n) {
356 while (n->parent) {
357 int count = countnode234(n);
358 int childnum;
359 childnum = (n->parent->kids[0] == n ? 0 :
360 n->parent->kids[1] == n ? 1 :
361 n->parent->kids[2] == n ? 2 : 3);
362 n->parent->counts[childnum] = count;
363 n = n->parent;
364 }
365 } else {
366 LOG((" root is overloaded, split into two\n"));
367 t->root = mknew(node234);
368 t->root->kids[0] = left; t->root->counts[0] = lcount;
369 t->root->elems[0] = e;
370 t->root->kids[1] = right; t->root->counts[1] = rcount;
371 t->root->elems[1] = NULL;
372 t->root->kids[2] = NULL; t->root->counts[2] = 0;
373 t->root->elems[2] = NULL;
374 t->root->kids[3] = NULL; t->root->counts[3] = 0;
375 t->root->parent = NULL;
376 if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
377 if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
378 LOG((" new root is %p/%d [%p] %p/%d\n",
379 t->root->kids[0], t->root->counts[0],
380 t->root->elems[0],
381 t->root->kids[1], t->root->counts[1]));
382 }
383
384 return orig_e;
385 }
386
387 void *add234(tree234 *t, void *e) {
388 if (!t->cmp) /* tree is unsorted */
389 return NULL;
390
391 return add234_internal(t, e, -1);
392 }
393 void *addpos234(tree234 *t, void *e, int index) {
394 if (index < 0 || /* index out of range */
395 t->cmp) /* tree is sorted */
396 return NULL; /* return failure */
397
398 return add234_internal(t, e, index); /* this checks the upper bound */
399 }
400
401 /*
402 * Look up the element at a given numeric index in a 2-3-4 tree.
403 * Returns NULL if the index is out of range.
404 */
405 void *index234(tree234 *t, int index) {
406 node234 *n;
407
408 if (!t->root)
409 return NULL; /* tree is empty */
410
411 if (index < 0 || index >= countnode234(t->root))
412 return NULL; /* out of range */
413
414 n = t->root;
415
416 while (n) {
417 if (index < n->counts[0])
418 n = n->kids[0];
419 else if (index -= n->counts[0] + 1, index < 0)
420 return n->elems[0];
421 else if (index < n->counts[1])
422 n = n->kids[1];
423 else if (index -= n->counts[1] + 1, index < 0)
424 return n->elems[1];
425 else if (index < n->counts[2])
426 n = n->kids[2];
427 else if (index -= n->counts[2] + 1, index < 0)
428 return n->elems[2];
429 else
430 n = n->kids[3];
431 }
432
433 /* We shouldn't ever get here. I wonder how we did. */
434 return NULL;
435 }
436
437 /*
438 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
439 * found. e is always passed as the first argument to cmp, so cmp
440 * can be an asymmetric function if desired. cmp can also be passed
441 * as NULL, in which case the compare function from the tree proper
442 * will be used.
443 */
444 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
445 int relation, int *index) {
446 node234 *n;
447 void *ret;
448 int c;
449 int idx, ecount, kcount, cmpret;
450
451 if (t->root == NULL)
452 return NULL;
453
454 if (cmp == NULL)
455 cmp = t->cmp;
456
457 n = t->root;
458 /*
459 * Attempt to find the element itself.
460 */
461 idx = 0;
462 ecount = -1;
463 /*
464 * Prepare a fake `cmp' result if e is NULL.
465 */
466 cmpret = 0;
467 if (e == NULL) {
468 assert(relation == REL234_LT || relation == REL234_GT);
469 if (relation == REL234_LT)
470 cmpret = +1; /* e is a max: always greater */
471 else if (relation == REL234_GT)
472 cmpret = -1; /* e is a min: always smaller */
473 }
474 while (1) {
475 for (kcount = 0; kcount < 4; kcount++) {
476 if (kcount >= 3 || n->elems[kcount] == NULL ||
477 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
478 break;
479 }
480 if (n->kids[kcount]) idx += n->counts[kcount];
481 if (c == 0) {
482 ecount = kcount;
483 break;
484 }
485 idx++;
486 }
487 if (ecount >= 0)
488 break;
489 if (n->kids[kcount])
490 n = n->kids[kcount];
491 else
492 break;
493 }
494
495 if (ecount >= 0) {
496 /*
497 * We have found the element we're looking for. It's
498 * n->elems[ecount], at tree index idx. If our search
499 * relation is EQ, LE or GE we can now go home.
500 */
501 if (relation != REL234_LT && relation != REL234_GT) {
502 if (index) *index = idx;
503 return n->elems[ecount];
504 }
505
506 /*
507 * Otherwise, we'll do an indexed lookup for the previous
508 * or next element. (It would be perfectly possible to
509 * implement these search types in a non-counted tree by
510 * going back up from where we are, but far more fiddly.)
511 */
512 if (relation == REL234_LT)
513 idx--;
514 else
515 idx++;
516 } else {
517 /*
518 * We've found our way to the bottom of the tree and we
519 * know where we would insert this node if we wanted to:
520 * we'd put it in in place of the (empty) subtree
521 * n->kids[kcount], and it would have index idx
522 *
523 * But the actual element isn't there. So if our search
524 * relation is EQ, we're doomed.
525 */
526 if (relation == REL234_EQ)
527 return NULL;
528
529 /*
530 * Otherwise, we must do an index lookup for index idx-1
531 * (if we're going left - LE or LT) or index idx (if we're
532 * going right - GE or GT).
533 */
534 if (relation == REL234_LT || relation == REL234_LE) {
535 idx--;
536 }
537 }
538
539 /*
540 * We know the index of the element we want; just call index234
541 * to do the rest. This will return NULL if the index is out of
542 * bounds, which is exactly what we want.
543 */
544 ret = index234(t, idx);
545 if (ret && index) *index = idx;
546 return ret;
547 }
548 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
549 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
550 }
551 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
552 return findrelpos234(t, e, cmp, relation, NULL);
553 }
554 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
555 return findrelpos234(t, e, cmp, REL234_EQ, index);
556 }
557
558 /*
559 * Delete an element e in a 2-3-4 tree. Does not free the element,
560 * merely removes all links to it from the tree nodes.
561 */
562 static void *delpos234_internal(tree234 *t, int index) {
563 node234 *n;
564 void *retval;
565 int ei = -1;
566
567 retval = 0;
568
569 n = t->root;
570 LOG(("deleting item %d from tree %p\n", index, t));
571 while (1) {
572 while (n) {
573 int c;
574 int ki;
575 node234 *sub;
576
577 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
578 n,
579 n->kids[0], n->counts[0], n->elems[0],
580 n->kids[1], n->counts[1], n->elems[1],
581 n->kids[2], n->counts[2], n->elems[2],
582 n->kids[3], n->counts[3],
583 index));
584 if (index < n->counts[0]) {
585 ki = 0;
586 } else if (index -= n->counts[0]+1, index < 0) {
587 ei = 0; break;
588 } else if (index < n->counts[1]) {
589 ki = 1;
590 } else if (index -= n->counts[1]+1, index < 0) {
591 ei = 1; break;
592 } else if (index < n->counts[2]) {
593 ki = 2;
594 } else if (index -= n->counts[2]+1, index < 0) {
595 ei = 2; break;
596 } else {
597 ki = 3;
598 }
599 /*
600 * Recurse down to subtree ki. If it has only one element,
601 * we have to do some transformation to start with.
602 */
603 LOG((" moving to subtree %d\n", ki));
604 sub = n->kids[ki];
605 if (!sub->elems[1]) {
606 LOG((" subtree has only one element!\n", ki));
607 if (ki > 0 && n->kids[ki-1]->elems[1]) {
608 /*
609 * Case 3a, left-handed variant. Child ki has
610 * only one element, but child ki-1 has two or
611 * more. So we need to move a subtree from ki-1
612 * to ki.
613 *
614 * . C . . B .
615 * / \ -> / \
616 * [more] a A b B c d D e [more] a A b c C d D e
617 */
618 node234 *sib = n->kids[ki-1];
619 int lastelem = (sib->elems[2] ? 2 :
620 sib->elems[1] ? 1 : 0);
621 sub->kids[2] = sub->kids[1];
622 sub->counts[2] = sub->counts[1];
623 sub->elems[1] = sub->elems[0];
624 sub->kids[1] = sub->kids[0];
625 sub->counts[1] = sub->counts[0];
626 sub->elems[0] = n->elems[ki-1];
627 sub->kids[0] = sib->kids[lastelem+1];
628 sub->counts[0] = sib->counts[lastelem+1];
629 if (sub->kids[0]) sub->kids[0]->parent = sub;
630 n->elems[ki-1] = sib->elems[lastelem];
631 sib->kids[lastelem+1] = NULL;
632 sib->counts[lastelem+1] = 0;
633 sib->elems[lastelem] = NULL;
634 n->counts[ki] = countnode234(sub);
635 LOG((" case 3a left\n"));
636 LOG((" index and left subtree count before adjustment: %d, %d\n",
637 index, n->counts[ki-1]));
638 index += n->counts[ki-1];
639 n->counts[ki-1] = countnode234(sib);
640 index -= n->counts[ki-1];
641 LOG((" index and left subtree count after adjustment: %d, %d\n",
642 index, n->counts[ki-1]));
643 } else if (ki < 3 && n->kids[ki+1] &&
644 n->kids[ki+1]->elems[1]) {
645 /*
646 * Case 3a, right-handed variant. ki has only
647 * one element but ki+1 has two or more. Move a
648 * subtree from ki+1 to ki.
649 *
650 * . B . . C .
651 * / \ -> / \
652 * a A b c C d D e [more] a A b B c d D e [more]
653 */
654 node234 *sib = n->kids[ki+1];
655 int j;
656 sub->elems[1] = n->elems[ki];
657 sub->kids[2] = sib->kids[0];
658 sub->counts[2] = sib->counts[0];
659 if (sub->kids[2]) sub->kids[2]->parent = sub;
660 n->elems[ki] = sib->elems[0];
661 sib->kids[0] = sib->kids[1];
662 sib->counts[0] = sib->counts[1];
663 for (j = 0; j < 2 && sib->elems[j+1]; j++) {
664 sib->kids[j+1] = sib->kids[j+2];
665 sib->counts[j+1] = sib->counts[j+2];
666 sib->elems[j] = sib->elems[j+1];
667 }
668 sib->kids[j+1] = NULL;
669 sib->counts[j+1] = 0;
670 sib->elems[j] = NULL;
671 n->counts[ki] = countnode234(sub);
672 n->counts[ki+1] = countnode234(sib);
673 LOG((" case 3a right\n"));
674 } else {
675 /*
676 * Case 3b. ki has only one element, and has no
677 * neighbour with more than one. So pick a
678 * neighbour and merge it with ki, taking an
679 * element down from n to go in the middle.
680 *
681 * . B . .
682 * / \ -> |
683 * a A b c C d a A b B c C d
684 *
685 * (Since at all points we have avoided
686 * descending to a node with only one element,
687 * we can be sure that n is not reduced to
688 * nothingness by this move, _unless_ it was
689 * the very first node, ie the root of the
690 * tree. In that case we remove the now-empty
691 * root and replace it with its single large
692 * child as shown.)
693 */
694 node234 *sib;
695 int j;
696
697 if (ki > 0) {
698 ki--;
699 index += n->counts[ki] + 1;
700 }
701 sib = n->kids[ki];
702 sub = n->kids[ki+1];
703
704 sub->kids[3] = sub->kids[1];
705 sub->counts[3] = sub->counts[1];
706 sub->elems[2] = sub->elems[0];
707 sub->kids[2] = sub->kids[0];
708 sub->counts[2] = sub->counts[0];
709 sub->elems[1] = n->elems[ki];
710 sub->kids[1] = sib->kids[1];
711 sub->counts[1] = sib->counts[1];
712 if (sub->kids[1]) sub->kids[1]->parent = sub;
713 sub->elems[0] = sib->elems[0];
714 sub->kids[0] = sib->kids[0];
715 sub->counts[0] = sib->counts[0];
716 if (sub->kids[0]) sub->kids[0]->parent = sub;
717
718 n->counts[ki+1] = countnode234(sub);
719
720 sfree(sib);
721
722 /*
723 * That's built the big node in sub. Now we
724 * need to remove the reference to sib in n.
725 */
726 for (j = ki; j < 3 && n->kids[j+1]; j++) {
727 n->kids[j] = n->kids[j+1];
728 n->counts[j] = n->counts[j+1];
729 n->elems[j] = j<2 ? n->elems[j+1] : NULL;
730 }
731 n->kids[j] = NULL;
732 n->counts[j] = 0;
733 if (j < 3) n->elems[j] = NULL;
734 LOG((" case 3b ki=%d\n", ki));
735
736 if (!n->elems[0]) {
737 /*
738 * The root is empty and needs to be
739 * removed.
740 */
741 LOG((" shifting root!\n"));
742 t->root = sub;
743 sub->parent = NULL;
744 sfree(n);
745 }
746 }
747 }
748 n = sub;
749 }
750 if (!retval)
751 retval = n->elems[ei];
752
753 if (ei==-1)
754 return NULL; /* although this shouldn't happen */
755
756 /*
757 * Treat special case: this is the one remaining item in
758 * the tree. n is the tree root (no parent), has one
759 * element (no elems[1]), and has no kids (no kids[0]).
760 */
761 if (!n->parent && !n->elems[1] && !n->kids[0]) {
762 LOG((" removed last element in tree\n"));
763 sfree(n);
764 t->root = NULL;
765 return retval;
766 }
767
768 /*
769 * Now we have the element we want, as n->elems[ei], and we
770 * have also arranged for that element not to be the only
771 * one in its node. So...
772 */
773
774 if (!n->kids[0] && n->elems[1]) {
775 /*
776 * Case 1. n is a leaf node with more than one element,
777 * so it's _really easy_. Just delete the thing and
778 * we're done.
779 */
780 int i;
781 LOG((" case 1\n"));
782 for (i = ei; i < 2 && n->elems[i+1]; i++)
783 n->elems[i] = n->elems[i+1];
784 n->elems[i] = NULL;
785 /*
786 * Having done that to the leaf node, we now go back up
787 * the tree fixing the counts.
788 */
789 while (n->parent) {
790 int childnum;
791 childnum = (n->parent->kids[0] == n ? 0 :
792 n->parent->kids[1] == n ? 1 :
793 n->parent->kids[2] == n ? 2 : 3);
794 n->parent->counts[childnum]--;
795 n = n->parent;
796 }
797 return retval; /* finished! */
798 } else if (n->kids[ei]->elems[1]) {
799 /*
800 * Case 2a. n is an internal node, and the root of the
801 * subtree to the left of e has more than one element.
802 * So find the predecessor p to e (ie the largest node
803 * in that subtree), place it where e currently is, and
804 * then start the deletion process over again on the
805 * subtree with p as target.
806 */
807 node234 *m = n->kids[ei];
808 void *target;
809 LOG((" case 2a\n"));
810 while (m->kids[0]) {
811 m = (m->kids[3] ? m->kids[3] :
812 m->kids[2] ? m->kids[2] :
813 m->kids[1] ? m->kids[1] : m->kids[0]);
814 }
815 target = (m->elems[2] ? m->elems[2] :
816 m->elems[1] ? m->elems[1] : m->elems[0]);
817 n->elems[ei] = target;
818 index = n->counts[ei]-1;
819 n = n->kids[ei];
820 } else if (n->kids[ei+1]->elems[1]) {
821 /*
822 * Case 2b, symmetric to 2a but s/left/right/ and
823 * s/predecessor/successor/. (And s/largest/smallest/).
824 */
825 node234 *m = n->kids[ei+1];
826 void *target;
827 LOG((" case 2b\n"));
828 while (m->kids[0]) {
829 m = m->kids[0];
830 }
831 target = m->elems[0];
832 n->elems[ei] = target;
833 n = n->kids[ei+1];
834 index = 0;
835 } else {
836 /*
837 * Case 2c. n is an internal node, and the subtrees to
838 * the left and right of e both have only one element.
839 * So combine the two subnodes into a single big node
840 * with their own elements on the left and right and e
841 * in the middle, then restart the deletion process on
842 * that subtree, with e still as target.
843 */
844 node234 *a = n->kids[ei], *b = n->kids[ei+1];
845 int j;
846
847 LOG((" case 2c\n"));
848 a->elems[1] = n->elems[ei];
849 a->kids[2] = b->kids[0];
850 a->counts[2] = b->counts[0];
851 if (a->kids[2]) a->kids[2]->parent = a;
852 a->elems[2] = b->elems[0];
853 a->kids[3] = b->kids[1];
854 a->counts[3] = b->counts[1];
855 if (a->kids[3]) a->kids[3]->parent = a;
856 sfree(b);
857 n->counts[ei] = countnode234(a);
858 /*
859 * That's built the big node in a, and destroyed b. Now
860 * remove the reference to b (and e) in n.
861 */
862 for (j = ei; j < 2 && n->elems[j+1]; j++) {
863 n->elems[j] = n->elems[j+1];
864 n->kids[j+1] = n->kids[j+2];
865 n->counts[j+1] = n->counts[j+2];
866 }
867 n->elems[j] = NULL;
868 n->kids[j+1] = NULL;
869 n->counts[j+1] = 0;
870 /*
871 * It's possible, in this case, that we've just removed
872 * the only element in the root of the tree. If so,
873 * shift the root.
874 */
875 if (n->elems[0] == NULL) {
876 LOG((" shifting root!\n"));
877 t->root = a;
878 a->parent = NULL;
879 sfree(n);
880 }
881 /*
882 * Now go round the deletion process again, with n
883 * pointing at the new big node and e still the same.
884 */
885 n = a;
886 index = a->counts[0] + a->counts[1] + 1;
887 }
888 }
889 }
890 void *delpos234(tree234 *t, int index) {
891 if (index < 0 || index >= countnode234(t->root))
892 return NULL;
893 return delpos234_internal(t, index);
894 }
895 void *del234(tree234 *t, void *e) {
896 int index;
897 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
898 return NULL; /* it wasn't in there anyway */
899 return delpos234_internal(t, index); /* it's there; delete it. */
900 }
901
902 #ifdef TEST
903
904 /*
905 * Test code for the 2-3-4 tree. This code maintains an alternative
906 * representation of the data in the tree, in an array (using the
907 * obvious and slow insert and delete functions). After each tree
908 * operation, the verify() function is called, which ensures all
909 * the tree properties are preserved:
910 * - node->child->parent always equals node
911 * - tree->root->parent always equals NULL
912 * - number of kids == 0 or number of elements + 1;
913 * - tree has the same depth everywhere
914 * - every node has at least one element
915 * - subtree element counts are accurate
916 * - any NULL kid pointer is accompanied by a zero count
917 * - in a sorted tree: ordering property between elements of a
918 * node and elements of its children is preserved
919 * and also ensures the list represented by the tree is the same
920 * list it should be. (This last check also doubly verifies the
921 * ordering properties, because the `same list it should be' is by
922 * definition correctly ordered. It also ensures all nodes are
923 * distinct, because the enum functions would get caught in a loop
924 * if not.)
925 */
926
927 #include <stdarg.h>
928
929 #define srealloc realloc
930
931 /*
932 * Error reporting function.
933 */
934 void error(char *fmt, ...) {
935 va_list ap;
936 printf("ERROR: ");
937 va_start(ap, fmt);
938 vfprintf(stdout, fmt, ap);
939 va_end(ap);
940 printf("\n");
941 }
942
943 /* The array representation of the data. */
944 void **array;
945 int arraylen, arraysize;
946 cmpfn234 cmp;
947
948 /* The tree representation of the same data. */
949 tree234 *tree;
950
951 typedef struct {
952 int treedepth;
953 int elemcount;
954 } chkctx;
955
956 int chknode(chkctx *ctx, int level, node234 *node,
957 void *lowbound, void *highbound) {
958 int nkids, nelems;
959 int i;
960 int count;
961
962 /* Count the non-NULL kids. */
963 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
964 /* Ensure no kids beyond the first NULL are non-NULL. */
965 for (i = nkids; i < 4; i++)
966 if (node->kids[i]) {
967 error("node %p: nkids=%d but kids[%d] non-NULL",
968 node, nkids, i);
969 } else if (node->counts[i]) {
970 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
971 node, i, i, node->counts[i]);
972 }
973
974 /* Count the non-NULL elements. */
975 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
976 /* Ensure no elements beyond the first NULL are non-NULL. */
977 for (i = nelems; i < 3; i++)
978 if (node->elems[i]) {
979 error("node %p: nelems=%d but elems[%d] non-NULL",
980 node, nelems, i);
981 }
982
983 if (nkids == 0) {
984 /*
985 * If nkids==0, this is a leaf node; verify that the tree
986 * depth is the same everywhere.
987 */
988 if (ctx->treedepth < 0)
989 ctx->treedepth = level; /* we didn't know the depth yet */
990 else if (ctx->treedepth != level)
991 error("node %p: leaf at depth %d, previously seen depth %d",
992 node, level, ctx->treedepth);
993 } else {
994 /*
995 * If nkids != 0, then it should be nelems+1, unless nelems
996 * is 0 in which case nkids should also be 0 (and so we
997 * shouldn't be in this condition at all).
998 */
999 int shouldkids = (nelems ? nelems+1 : 0);
1000 if (nkids != shouldkids) {
1001 error("node %p: %d elems should mean %d kids but has %d",
1002 node, nelems, shouldkids, nkids);
1003 }
1004 }
1005
1006 /*
1007 * nelems should be at least 1.
1008 */
1009 if (nelems == 0) {
1010 error("node %p: no elems", node, nkids);
1011 }
1012
1013 /*
1014 * Add nelems to the running element count of the whole tree.
1015 */
1016 ctx->elemcount += nelems;
1017
1018 /*
1019 * Check ordering property: all elements should be strictly >
1020 * lowbound, strictly < highbound, and strictly < each other in
1021 * sequence. (lowbound and highbound are NULL at edges of tree
1022 * - both NULL at root node - and NULL is considered to be <
1023 * everything and > everything. IYSWIM.)
1024 */
1025 if (cmp) {
1026 for (i = -1; i < nelems; i++) {
1027 void *lower = (i == -1 ? lowbound : node->elems[i]);
1028 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1029 if (lower && higher && cmp(lower, higher) >= 0) {
1030 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1031 node, i, lower, i+1, higher);
1032 }
1033 }
1034 }
1035
1036 /*
1037 * Check parent pointers: all non-NULL kids should have a
1038 * parent pointer coming back to this node.
1039 */
1040 for (i = 0; i < nkids; i++)
1041 if (node->kids[i]->parent != node) {
1042 error("node %p kid %d: parent ptr is %p not %p",
1043 node, i, node->kids[i]->parent, node);
1044 }
1045
1046
1047 /*
1048 * Now (finally!) recurse into subtrees.
1049 */
1050 count = nelems;
1051
1052 for (i = 0; i < nkids; i++) {
1053 void *lower = (i == 0 ? lowbound : node->elems[i-1]);
1054 void *higher = (i >= nelems ? highbound : node->elems[i]);
1055 int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
1056 if (node->counts[i] != subcount) {
1057 error("node %p kid %d: count says %d, subtree really has %d",
1058 node, i, node->counts[i], subcount);
1059 }
1060 count += subcount;
1061 }
1062
1063 return count;
1064 }
1065
1066 void verify(void) {
1067 chkctx ctx;
1068 int i;
1069 void *p;
1070
1071 ctx.treedepth = -1; /* depth unknown yet */
1072 ctx.elemcount = 0; /* no elements seen yet */
1073 /*
1074 * Verify validity of tree properties.
1075 */
1076 if (tree->root) {
1077 if (tree->root->parent != NULL)
1078 error("root->parent is %p should be null", tree->root->parent);
1079 chknode(&ctx, 0, tree->root, NULL, NULL);
1080 }
1081 printf("tree depth: %d\n", ctx.treedepth);
1082 /*
1083 * Enumerate the tree and ensure it matches up to the array.
1084 */
1085 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1086 if (i >= arraylen)
1087 error("tree contains more than %d elements", arraylen);
1088 if (array[i] != p)
1089 error("enum at position %d: array says %s, tree says %s",
1090 i, array[i], p);
1091 }
1092 if (ctx.elemcount != i) {
1093 error("tree really contains %d elements, enum gave %d",
1094 ctx.elemcount, i);
1095 }
1096 if (i < arraylen) {
1097 error("enum gave only %d elements, array has %d", i, arraylen);
1098 }
1099 i = count234(tree);
1100 if (ctx.elemcount != i) {
1101 error("tree really contains %d elements, count234 gave %d",
1102 ctx.elemcount, i);
1103 }
1104 }
1105
1106 void internal_addtest(void *elem, int index, void *realret) {
1107 int i, j;
1108 void *retval;
1109
1110 if (arraysize < arraylen+1) {
1111 arraysize = arraylen+1+256;
1112 array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
1113 srealloc(array, arraysize*sizeof(*array)));
1114 }
1115
1116 i = index;
1117 /* now i points to the first element >= elem */
1118 retval = elem; /* expect elem returned (success) */
1119 for (j = arraylen; j > i; j--)
1120 array[j] = array[j-1];
1121 array[i] = elem; /* add elem to array */
1122 arraylen++;
1123
1124 if (realret != retval) {
1125 error("add: retval was %p expected %p", realret, retval);
1126 }
1127
1128 verify();
1129 }
1130
1131 void addtest(void *elem) {
1132 int i;
1133 void *realret;
1134
1135 realret = add234(tree, elem);
1136
1137 i = 0;
1138 while (i < arraylen && cmp(elem, array[i]) > 0)
1139 i++;
1140 if (i < arraylen && !cmp(elem, array[i])) {
1141 void *retval = array[i]; /* expect that returned not elem */
1142 if (realret != retval) {
1143 error("add: retval was %p expected %p", realret, retval);
1144 }
1145 } else
1146 internal_addtest(elem, i, realret);
1147 }
1148
1149 void addpostest(void *elem, int i) {
1150 void *realret;
1151
1152 realret = addpos234(tree, elem, i);
1153
1154 internal_addtest(elem, i, realret);
1155 }
1156
1157 void delpostest(int i) {
1158 int index = i;
1159 void *elem = array[i], *ret;
1160
1161 /* i points to the right element */
1162 while (i < arraylen-1) {
1163 array[i] = array[i+1];
1164 i++;
1165 }
1166 arraylen--; /* delete elem from array */
1167
1168 if (tree->cmp)
1169 ret = del234(tree, elem);
1170 else
1171 ret = delpos234(tree, index);
1172
1173 if (ret != elem) {
1174 error("del returned %p, expected %p", ret, elem);
1175 }
1176
1177 verify();
1178 }
1179
1180 void deltest(void *elem) {
1181 int i;
1182
1183 i = 0;
1184 while (i < arraylen && cmp(elem, array[i]) > 0)
1185 i++;
1186 if (i >= arraylen || cmp(elem, array[i]) != 0)
1187 return; /* don't do it! */
1188 delpostest(i);
1189 }
1190
1191 /* A sample data set and test utility. Designed for pseudo-randomness,
1192 * and yet repeatability. */
1193
1194 /*
1195 * This random number generator uses the `portable implementation'
1196 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1197 * change it if not.
1198 */
1199 int randomnumber(unsigned *seed) {
1200 *seed *= 1103515245;
1201 *seed += 12345;
1202 return ((*seed) / 65536) % 32768;
1203 }
1204
1205 int mycmp(void *av, void *bv) {
1206 char const *a = (char const *)av;
1207 char const *b = (char const *)bv;
1208 return strcmp(a, b);
1209 }
1210
1211 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1212
1213 char *strings[] = {
1214 "a", "ab", "absque", "coram", "de",
1215 "palam", "clam", "cum", "ex", "e",
1216 "sine", "tenus", "pro", "prae",
1217 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1218 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1219 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1220 "murfl", "spoo", "breen", "flarn", "octothorpe",
1221 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1222 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1223 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1224 "wand", "ring", "amulet"
1225 };
1226
1227 #define NSTR lenof(strings)
1228
1229 int findtest(void) {
1230 const static int rels[] = {
1231 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1232 };
1233 const static char *const relnames[] = {
1234 "EQ", "GE", "LE", "LT", "GT"
1235 };
1236 int i, j, rel, index;
1237 char *p, *ret, *realret, *realret2;
1238 int lo, hi, mid, c;
1239
1240 for (i = 0; i < NSTR; i++) {
1241 p = strings[i];
1242 for (j = 0; j < sizeof(rels)/sizeof(*rels); j++) {
1243 rel = rels[j];
1244
1245 lo = 0; hi = arraylen-1;
1246 while (lo <= hi) {
1247 mid = (lo + hi) / 2;
1248 c = strcmp(p, array[mid]);
1249 if (c < 0)
1250 hi = mid-1;
1251 else if (c > 0)
1252 lo = mid+1;
1253 else
1254 break;
1255 }
1256
1257 if (c == 0) {
1258 if (rel == REL234_LT)
1259 ret = (mid > 0 ? array[--mid] : NULL);
1260 else if (rel == REL234_GT)
1261 ret = (mid < arraylen-1 ? array[++mid] : NULL);
1262 else
1263 ret = array[mid];
1264 } else {
1265 assert(lo == hi+1);
1266 if (rel == REL234_LT || rel == REL234_LE) {
1267 mid = hi;
1268 ret = (hi >= 0 ? array[hi] : NULL);
1269 } else if (rel == REL234_GT || rel == REL234_GE) {
1270 mid = lo;
1271 ret = (lo < arraylen ? array[lo] : NULL);
1272 } else
1273 ret = NULL;
1274 }
1275
1276 realret = findrelpos234(tree, p, NULL, rel, &index);
1277 if (realret != ret) {
1278 error("find(\"%s\",%s) gave %s should be %s",
1279 p, relnames[j], realret, ret);
1280 }
1281 if (realret && index != mid) {
1282 error("find(\"%s\",%s) gave %d should be %d",
1283 p, relnames[j], index, mid);
1284 }
1285 if (realret && rel == REL234_EQ) {
1286 realret2 = index234(tree, index);
1287 if (realret2 != realret) {
1288 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1289 p, relnames[j], realret, index, index, realret2);
1290 }
1291 }
1292 #if 0
1293 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1294 realret, index);
1295 #endif
1296 }
1297 }
1298
1299 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1300 if (arraylen && (realret != array[0] || index != 0)) {
1301 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1302 realret, index, array[0]);
1303 } else if (!arraylen && (realret != NULL)) {
1304 error("find(NULL,GT) gave %s(%d) should be NULL",
1305 realret, index);
1306 }
1307
1308 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1309 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
1310 error("find(NULL,LT) gave %s(%d) should be %s(0)",
1311 realret, index, array[arraylen-1]);
1312 } else if (!arraylen && (realret != NULL)) {
1313 error("find(NULL,LT) gave %s(%d) should be NULL",
1314 realret, index);
1315 }
1316 }
1317
1318 int main(void) {
1319 int in[NSTR];
1320 int i, j, k;
1321 unsigned seed = 0;
1322
1323 for (i = 0; i < NSTR; i++) in[i] = 0;
1324 array = NULL;
1325 arraylen = arraysize = 0;
1326 tree = newtree234(mycmp);
1327 cmp = mycmp;
1328
1329 verify();
1330 for (i = 0; i < 10000; i++) {
1331 j = randomnumber(&seed);
1332 j %= NSTR;
1333 printf("trial: %d\n", i);
1334 if (in[j]) {
1335 printf("deleting %s (%d)\n", strings[j], j);
1336 deltest(strings[j]);
1337 in[j] = 0;
1338 } else {
1339 printf("adding %s (%d)\n", strings[j], j);
1340 addtest(strings[j]);
1341 in[j] = 1;
1342 }
1343 findtest();
1344 }
1345
1346 while (arraylen > 0) {
1347 j = randomnumber(&seed);
1348 j %= arraylen;
1349 deltest(array[j]);
1350 }
1351
1352 freetree234(tree);
1353
1354 /*
1355 * Now try an unsorted tree. We don't really need to test
1356 * delpos234 because we know del234 is based on it, so it's
1357 * already been tested in the above sorted-tree code; but for
1358 * completeness we'll use it to tear down our unsorted tree
1359 * once we've built it.
1360 */
1361 tree = newtree234(NULL);
1362 cmp = NULL;
1363 verify();
1364 for (i = 0; i < 1000; i++) {
1365 printf("trial: %d\n", i);
1366 j = randomnumber(&seed);
1367 j %= NSTR;
1368 k = randomnumber(&seed);
1369 k %= count234(tree)+1;
1370 printf("adding string %s at index %d\n", strings[j], k);
1371 addpostest(strings[j], k);
1372 }
1373 while (count234(tree) > 0) {
1374 printf("cleanup: tree size %d\n", count234(tree));
1375 j = randomnumber(&seed);
1376 j %= count234(tree);
1377 printf("deleting string %s from index %d\n", array[j], j);
1378 delpostest(j);
1379 }
1380
1381 return 0;
1382 }
1383
1384 #endif