Remove diagnostics and attempt to work around VC compiler bug :-(
[u/mdw/putty] / tree234.c
1 /*
2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
3 *
4 * This file is copyright 1999-2001 Simon Tatham.
5 *
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
13 * conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <assert.h>
31
32 #include "tree234.h"
33
34 #define smalloc malloc
35 #define sfree free
36
37 #define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
38
39 #ifdef TEST
40 #define LOG(x) (printf x)
41 #else
42 #define LOG(x)
43 #endif
44
45 typedef struct node234_Tag node234;
46
47 struct tree234_Tag {
48 node234 *root;
49 cmpfn234 cmp;
50 };
51
52 struct node234_Tag {
53 node234 *parent;
54 node234 *kids[4];
55 int counts[4];
56 void *elems[3];
57 };
58
59 /*
60 * Create a 2-3-4 tree.
61 */
62 tree234 *newtree234(cmpfn234 cmp) {
63 tree234 *ret = mknew(tree234);
64 LOG(("created tree %p\n", ret));
65 ret->root = NULL;
66 ret->cmp = cmp;
67 return ret;
68 }
69
70 /*
71 * Free a 2-3-4 tree (not including freeing the elements).
72 */
73 static void freenode234(node234 *n) {
74 if (!n)
75 return;
76 freenode234(n->kids[0]);
77 freenode234(n->kids[1]);
78 freenode234(n->kids[2]);
79 freenode234(n->kids[3]);
80 sfree(n);
81 }
82 void freetree234(tree234 *t) {
83 freenode234(t->root);
84 sfree(t);
85 }
86
87 /*
88 * Internal function to count a node.
89 */
90 static int countnode234(node234 *n) {
91 int count = 0;
92 int i;
93 if (!n)
94 return 0;
95 for (i = 0; i < 4; i++)
96 count += n->counts[i];
97 for (i = 0; i < 3; i++)
98 if (n->elems[i])
99 count++;
100 return count;
101 }
102
103 /*
104 * Count the elements in a tree.
105 */
106 int count234(tree234 *t) {
107 if (t->root)
108 return countnode234(t->root);
109 else
110 return 0;
111 }
112
113 /*
114 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
115 * an existing element compares equal, returns that.
116 */
117 static void *add234_internal(tree234 *t, void *e, int index) {
118 node234 *n, **np, *left, *right;
119 void *orig_e = e;
120 int c, lcount, rcount;
121
122 LOG(("adding node %p to tree %p\n", e, t));
123 if (t->root == NULL) {
124 t->root = mknew(node234);
125 t->root->elems[1] = t->root->elems[2] = NULL;
126 t->root->kids[0] = t->root->kids[1] = NULL;
127 t->root->kids[2] = t->root->kids[3] = NULL;
128 t->root->counts[0] = t->root->counts[1] = 0;
129 t->root->counts[2] = t->root->counts[3] = 0;
130 t->root->parent = NULL;
131 t->root->elems[0] = e;
132 LOG((" created root %p\n", t->root));
133 return orig_e;
134 }
135
136 np = &t->root;
137 while (*np) {
138 int childnum;
139 n = *np;
140 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
141 n,
142 n->kids[0], n->counts[0], n->elems[0],
143 n->kids[1], n->counts[1], n->elems[1],
144 n->kids[2], n->counts[2], n->elems[2],
145 n->kids[3], n->counts[3]));
146 if (index >= 0) {
147 if (!n->kids[0]) {
148 /*
149 * Leaf node. We want to insert at kid position
150 * equal to the index:
151 *
152 * 0 A 1 B 2 C 3
153 */
154 childnum = index;
155 } else {
156 /*
157 * Internal node. We always descend through it (add
158 * always starts at the bottom, never in the
159 * middle).
160 */
161 do { /* this is a do ... while (0) to allow `break' */
162 if (index <= n->counts[0]) {
163 childnum = 0;
164 break;
165 }
166 index -= n->counts[0] + 1;
167 if (index <= n->counts[1]) {
168 childnum = 1;
169 break;
170 }
171 index -= n->counts[1] + 1;
172 if (index <= n->counts[2]) {
173 childnum = 2;
174 break;
175 }
176 index -= n->counts[2] + 1;
177 if (index <= n->counts[3]) {
178 childnum = 3;
179 break;
180 }
181 return NULL; /* error: index out of range */
182 } while (0);
183 }
184 } else {
185 if ((c = t->cmp(e, n->elems[0])) < 0)
186 childnum = 0;
187 else if (c == 0)
188 return n->elems[0]; /* already exists */
189 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
190 childnum = 1;
191 else if (c == 0)
192 return n->elems[1]; /* already exists */
193 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
194 childnum = 2;
195 else if (c == 0)
196 return n->elems[2]; /* already exists */
197 else
198 childnum = 3;
199 }
200 np = &n->kids[childnum];
201 LOG((" moving to child %d (%p)\n", childnum, *np));
202 }
203
204 /*
205 * We need to insert the new element in n at position np.
206 */
207 left = NULL; lcount = 0;
208 right = NULL; rcount = 0;
209 while (n) {
210 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
211 n,
212 n->kids[0], n->counts[0], n->elems[0],
213 n->kids[1], n->counts[1], n->elems[1],
214 n->kids[2], n->counts[2], n->elems[2],
215 n->kids[3], n->counts[3]));
216 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
217 left, lcount, e, right, rcount, np - n->kids));
218 if (n->elems[1] == NULL) {
219 /*
220 * Insert in a 2-node; simple.
221 */
222 if (np == &n->kids[0]) {
223 LOG((" inserting on left of 2-node\n"));
224 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
225 n->elems[1] = n->elems[0];
226 n->kids[1] = right; n->counts[1] = rcount;
227 n->elems[0] = e;
228 n->kids[0] = left; n->counts[0] = lcount;
229 } else { /* np == &n->kids[1] */
230 LOG((" inserting on right of 2-node\n"));
231 n->kids[2] = right; n->counts[2] = rcount;
232 n->elems[1] = e;
233 n->kids[1] = left; n->counts[1] = lcount;
234 }
235 if (n->kids[0]) n->kids[0]->parent = n;
236 if (n->kids[1]) n->kids[1]->parent = n;
237 if (n->kids[2]) n->kids[2]->parent = n;
238 LOG((" done\n"));
239 break;
240 } else if (n->elems[2] == NULL) {
241 /*
242 * Insert in a 3-node; simple.
243 */
244 if (np == &n->kids[0]) {
245 LOG((" inserting on left of 3-node\n"));
246 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
247 n->elems[2] = n->elems[1];
248 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
249 n->elems[1] = n->elems[0];
250 n->kids[1] = right; n->counts[1] = rcount;
251 n->elems[0] = e;
252 n->kids[0] = left; n->counts[0] = lcount;
253 } else if (np == &n->kids[1]) {
254 LOG((" inserting in middle of 3-node\n"));
255 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
256 n->elems[2] = n->elems[1];
257 n->kids[2] = right; n->counts[2] = rcount;
258 n->elems[1] = e;
259 n->kids[1] = left; n->counts[1] = lcount;
260 } else { /* np == &n->kids[2] */
261 LOG((" inserting on right of 3-node\n"));
262 n->kids[3] = right; n->counts[3] = rcount;
263 n->elems[2] = e;
264 n->kids[2] = left; n->counts[2] = lcount;
265 }
266 if (n->kids[0]) n->kids[0]->parent = n;
267 if (n->kids[1]) n->kids[1]->parent = n;
268 if (n->kids[2]) n->kids[2]->parent = n;
269 if (n->kids[3]) n->kids[3]->parent = n;
270 LOG((" done\n"));
271 break;
272 } else {
273 node234 *m = mknew(node234);
274 m->parent = n->parent;
275 LOG((" splitting a 4-node; created new node %p\n", m));
276 /*
277 * Insert in a 4-node; split into a 2-node and a
278 * 3-node, and move focus up a level.
279 *
280 * I don't think it matters which way round we put the
281 * 2 and the 3. For simplicity, we'll put the 3 first
282 * always.
283 */
284 if (np == &n->kids[0]) {
285 m->kids[0] = left; m->counts[0] = lcount;
286 m->elems[0] = e;
287 m->kids[1] = right; m->counts[1] = rcount;
288 m->elems[1] = n->elems[0];
289 m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1];
290 e = n->elems[1];
291 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
292 n->elems[0] = n->elems[2];
293 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
294 } else if (np == &n->kids[1]) {
295 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
296 m->elems[0] = n->elems[0];
297 m->kids[1] = left; m->counts[1] = lcount;
298 m->elems[1] = e;
299 m->kids[2] = right; m->counts[2] = rcount;
300 e = n->elems[1];
301 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
302 n->elems[0] = n->elems[2];
303 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
304 } else if (np == &n->kids[2]) {
305 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
306 m->elems[0] = n->elems[0];
307 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
308 m->elems[1] = n->elems[1];
309 m->kids[2] = left; m->counts[2] = lcount;
310 /* e = e; */
311 n->kids[0] = right; n->counts[0] = rcount;
312 n->elems[0] = n->elems[2];
313 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
314 } else { /* np == &n->kids[3] */
315 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
316 m->elems[0] = n->elems[0];
317 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
318 m->elems[1] = n->elems[1];
319 m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2];
320 n->kids[0] = left; n->counts[0] = lcount;
321 n->elems[0] = e;
322 n->kids[1] = right; n->counts[1] = rcount;
323 e = n->elems[2];
324 }
325 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
326 m->counts[3] = n->counts[3] = n->counts[2] = 0;
327 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
328 if (m->kids[0]) m->kids[0]->parent = m;
329 if (m->kids[1]) m->kids[1]->parent = m;
330 if (m->kids[2]) m->kids[2]->parent = m;
331 if (n->kids[0]) n->kids[0]->parent = n;
332 if (n->kids[1]) n->kids[1]->parent = n;
333 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
334 m->kids[0], m->counts[0], m->elems[0],
335 m->kids[1], m->counts[1], m->elems[1],
336 m->kids[2], m->counts[2]));
337 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
338 n->kids[0], n->counts[0], n->elems[0],
339 n->kids[1], n->counts[1]));
340 left = m; lcount = countnode234(left);
341 right = n; rcount = countnode234(right);
342 }
343 if (n->parent)
344 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
345 n->parent->kids[1] == n ? &n->parent->kids[1] :
346 n->parent->kids[2] == n ? &n->parent->kids[2] :
347 &n->parent->kids[3]);
348 n = n->parent;
349 }
350
351 /*
352 * If we've come out of here by `break', n will still be
353 * non-NULL and all we need to do is go back up the tree
354 * updating counts. If we've come here because n is NULL, we
355 * need to create a new root for the tree because the old one
356 * has just split into two. */
357 if (n) {
358 while (n->parent) {
359 int count = countnode234(n);
360 int childnum;
361 childnum = (n->parent->kids[0] == n ? 0 :
362 n->parent->kids[1] == n ? 1 :
363 n->parent->kids[2] == n ? 2 : 3);
364 n->parent->counts[childnum] = count;
365 n = n->parent;
366 }
367 } else {
368 LOG((" root is overloaded, split into two\n"));
369 t->root = mknew(node234);
370 t->root->kids[0] = left; t->root->counts[0] = lcount;
371 t->root->elems[0] = e;
372 t->root->kids[1] = right; t->root->counts[1] = rcount;
373 t->root->elems[1] = NULL;
374 t->root->kids[2] = NULL; t->root->counts[2] = 0;
375 t->root->elems[2] = NULL;
376 t->root->kids[3] = NULL; t->root->counts[3] = 0;
377 t->root->parent = NULL;
378 if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
379 if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
380 LOG((" new root is %p/%d [%p] %p/%d\n",
381 t->root->kids[0], t->root->counts[0],
382 t->root->elems[0],
383 t->root->kids[1], t->root->counts[1]));
384 }
385
386 return orig_e;
387 }
388
389 void *add234(tree234 *t, void *e) {
390 if (!t->cmp) /* tree is unsorted */
391 return NULL;
392
393 return add234_internal(t, e, -1);
394 }
395 void *addpos234(tree234 *t, void *e, int index) {
396 if (index < 0 || /* index out of range */
397 t->cmp) /* tree is sorted */
398 return NULL; /* return failure */
399
400 return add234_internal(t, e, index); /* this checks the upper bound */
401 }
402
403 /*
404 * Look up the element at a given numeric index in a 2-3-4 tree.
405 * Returns NULL if the index is out of range.
406 */
407 void *index234(tree234 *t, int index) {
408 node234 *n;
409
410 if (!t->root)
411 return NULL; /* tree is empty */
412
413 if (index < 0 || index >= countnode234(t->root))
414 return NULL; /* out of range */
415
416 n = t->root;
417
418 while (n) {
419 if (index < n->counts[0])
420 n = n->kids[0];
421 else if (index -= n->counts[0] + 1, index < 0)
422 return n->elems[0];
423 else if (index < n->counts[1])
424 n = n->kids[1];
425 else if (index -= n->counts[1] + 1, index < 0)
426 return n->elems[1];
427 else if (index < n->counts[2])
428 n = n->kids[2];
429 else if (index -= n->counts[2] + 1, index < 0)
430 return n->elems[2];
431 else
432 n = n->kids[3];
433 }
434
435 /* We shouldn't ever get here. I wonder how we did. */
436 return NULL;
437 }
438
439 /*
440 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
441 * found. e is always passed as the first argument to cmp, so cmp
442 * can be an asymmetric function if desired. cmp can also be passed
443 * as NULL, in which case the compare function from the tree proper
444 * will be used.
445 */
446 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
447 int relation, int *index) {
448 node234 *n;
449 void *ret;
450 int c;
451 int idx, ecount, kcount, cmpret;
452
453 if (t->root == NULL)
454 return NULL;
455
456 if (cmp == NULL)
457 cmp = t->cmp;
458
459 n = t->root;
460 /*
461 * Attempt to find the element itself.
462 */
463 idx = 0;
464 ecount = -1;
465 /*
466 * Prepare a fake `cmp' result if e is NULL.
467 */
468 cmpret = 0;
469 if (e == NULL) {
470 assert(relation == REL234_LT || relation == REL234_GT);
471 if (relation == REL234_LT)
472 cmpret = +1; /* e is a max: always greater */
473 else if (relation == REL234_GT)
474 cmpret = -1; /* e is a min: always smaller */
475 }
476 while (1) {
477 for (kcount = 0; kcount < 4; kcount++) {
478 if (kcount >= 3 || n->elems[kcount] == NULL ||
479 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
480 break;
481 }
482 if (n->kids[kcount]) idx += n->counts[kcount];
483 if (c == 0) {
484 ecount = kcount;
485 break;
486 }
487 idx++;
488 }
489 if (ecount >= 0)
490 break;
491 if (n->kids[kcount])
492 n = n->kids[kcount];
493 else
494 break;
495 }
496
497 if (ecount >= 0) {
498 /*
499 * We have found the element we're looking for. It's
500 * n->elems[ecount], at tree index idx. If our search
501 * relation is EQ, LE or GE we can now go home.
502 */
503 if (relation != REL234_LT && relation != REL234_GT) {
504 if (index) *index = idx;
505 return n->elems[ecount];
506 }
507
508 /*
509 * Otherwise, we'll do an indexed lookup for the previous
510 * or next element. (It would be perfectly possible to
511 * implement these search types in a non-counted tree by
512 * going back up from where we are, but far more fiddly.)
513 */
514 if (relation == REL234_LT)
515 idx--;
516 else
517 idx++;
518 } else {
519 /*
520 * We've found our way to the bottom of the tree and we
521 * know where we would insert this node if we wanted to:
522 * we'd put it in in place of the (empty) subtree
523 * n->kids[kcount], and it would have index idx
524 *
525 * But the actual element isn't there. So if our search
526 * relation is EQ, we're doomed.
527 */
528 if (relation == REL234_EQ)
529 return NULL;
530
531 /*
532 * Otherwise, we must do an index lookup for index idx-1
533 * (if we're going left - LE or LT) or index idx (if we're
534 * going right - GE or GT).
535 */
536 if (relation == REL234_LT || relation == REL234_LE) {
537 idx--;
538 }
539 }
540
541 /*
542 * We know the index of the element we want; just call index234
543 * to do the rest. This will return NULL if the index is out of
544 * bounds, which is exactly what we want.
545 */
546 ret = index234(t, idx);
547 if (ret && index) *index = idx;
548 return ret;
549 }
550 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
551 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
552 }
553 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
554 return findrelpos234(t, e, cmp, relation, NULL);
555 }
556 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
557 return findrelpos234(t, e, cmp, REL234_EQ, index);
558 }
559
560 /*
561 * Delete an element e in a 2-3-4 tree. Does not free the element,
562 * merely removes all links to it from the tree nodes.
563 */
564 static void *delpos234_internal(tree234 *t, int index) {
565 node234 *n;
566 void *retval;
567 int ei = -1;
568
569 retval = 0;
570
571 n = t->root;
572 LOG(("deleting item %d from tree %p\n", index, t));
573 while (1) {
574 while (n) {
575 int ki;
576 node234 *sub;
577
578 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
579 n,
580 n->kids[0], n->counts[0], n->elems[0],
581 n->kids[1], n->counts[1], n->elems[1],
582 n->kids[2], n->counts[2], n->elems[2],
583 n->kids[3], n->counts[3],
584 index));
585 if (index < n->counts[0]) {
586 ki = 0;
587 } else if (index -= n->counts[0]+1, index < 0) {
588 ei = 0; break;
589 } else if (index < n->counts[1]) {
590 ki = 1;
591 } else if (index -= n->counts[1]+1, index < 0) {
592 ei = 1; break;
593 } else if (index < n->counts[2]) {
594 ki = 2;
595 } else if (index -= n->counts[2]+1, index < 0) {
596 ei = 2; break;
597 } else {
598 ki = 3;
599 }
600 /*
601 * Recurse down to subtree ki. If it has only one element,
602 * we have to do some transformation to start with.
603 */
604 LOG((" moving to subtree %d\n", ki));
605 sub = n->kids[ki];
606 if (!sub->elems[1]) {
607 LOG((" subtree has only one element!\n", ki));
608 if (ki > 0 && n->kids[ki-1]->elems[1]) {
609 /*
610 * Case 3a, left-handed variant. Child ki has
611 * only one element, but child ki-1 has two or
612 * more. So we need to move a subtree from ki-1
613 * to ki.
614 *
615 * . C . . B .
616 * / \ -> / \
617 * [more] a A b B c d D e [more] a A b c C d D e
618 */
619 node234 *sib = n->kids[ki-1];
620 int lastelem = (sib->elems[2] ? 2 :
621 sib->elems[1] ? 1 : 0);
622 sub->kids[2] = sub->kids[1];
623 sub->counts[2] = sub->counts[1];
624 sub->elems[1] = sub->elems[0];
625 sub->kids[1] = sub->kids[0];
626 sub->counts[1] = sub->counts[0];
627 sub->elems[0] = n->elems[ki-1];
628 sub->kids[0] = sib->kids[lastelem+1];
629 sub->counts[0] = sib->counts[lastelem+1];
630 if (sub->kids[0]) sub->kids[0]->parent = sub;
631 n->elems[ki-1] = sib->elems[lastelem];
632 sib->kids[lastelem+1] = NULL;
633 sib->counts[lastelem+1] = 0;
634 sib->elems[lastelem] = NULL;
635 n->counts[ki] = countnode234(sub);
636 LOG((" case 3a left\n"));
637 LOG((" index and left subtree count before adjustment: %d, %d\n",
638 index, n->counts[ki-1]));
639 index += n->counts[ki-1];
640 n->counts[ki-1] = countnode234(sib);
641 index -= n->counts[ki-1];
642 LOG((" index and left subtree count after adjustment: %d, %d\n",
643 index, n->counts[ki-1]));
644 } else if (ki < 3 && n->kids[ki+1] &&
645 n->kids[ki+1]->elems[1]) {
646 /*
647 * Case 3a, right-handed variant. ki has only
648 * one element but ki+1 has two or more. Move a
649 * subtree from ki+1 to ki.
650 *
651 * . B . . C .
652 * / \ -> / \
653 * a A b c C d D e [more] a A b B c d D e [more]
654 */
655 node234 *sib = n->kids[ki+1];
656 int j;
657 sub->elems[1] = n->elems[ki];
658 sub->kids[2] = sib->kids[0];
659 sub->counts[2] = sib->counts[0];
660 if (sub->kids[2]) sub->kids[2]->parent = sub;
661 n->elems[ki] = sib->elems[0];
662 sib->kids[0] = sib->kids[1];
663 sib->counts[0] = sib->counts[1];
664 for (j = 0; j < 2 && sib->elems[j+1]; j++) {
665 sib->kids[j+1] = sib->kids[j+2];
666 sib->counts[j+1] = sib->counts[j+2];
667 sib->elems[j] = sib->elems[j+1];
668 }
669 sib->kids[j+1] = NULL;
670 sib->counts[j+1] = 0;
671 sib->elems[j] = NULL;
672 n->counts[ki] = countnode234(sub);
673 n->counts[ki+1] = countnode234(sib);
674 LOG((" case 3a right\n"));
675 } else {
676 /*
677 * Case 3b. ki has only one element, and has no
678 * neighbour with more than one. So pick a
679 * neighbour and merge it with ki, taking an
680 * element down from n to go in the middle.
681 *
682 * . B . .
683 * / \ -> |
684 * a A b c C d a A b B c C d
685 *
686 * (Since at all points we have avoided
687 * descending to a node with only one element,
688 * we can be sure that n is not reduced to
689 * nothingness by this move, _unless_ it was
690 * the very first node, ie the root of the
691 * tree. In that case we remove the now-empty
692 * root and replace it with its single large
693 * child as shown.)
694 */
695 node234 *sib;
696 int j;
697
698 if (ki > 0) {
699 ki--;
700 index += n->counts[ki] + 1;
701 }
702 sib = n->kids[ki];
703 sub = n->kids[ki+1];
704
705 sub->kids[3] = sub->kids[1];
706 sub->counts[3] = sub->counts[1];
707 sub->elems[2] = sub->elems[0];
708 sub->kids[2] = sub->kids[0];
709 sub->counts[2] = sub->counts[0];
710 sub->elems[1] = n->elems[ki];
711 sub->kids[1] = sib->kids[1];
712 sub->counts[1] = sib->counts[1];
713 if (sub->kids[1]) sub->kids[1]->parent = sub;
714 sub->elems[0] = sib->elems[0];
715 sub->kids[0] = sib->kids[0];
716 sub->counts[0] = sib->counts[0];
717 if (sub->kids[0]) sub->kids[0]->parent = sub;
718
719 n->counts[ki+1] = countnode234(sub);
720
721 sfree(sib);
722
723 /*
724 * That's built the big node in sub. Now we
725 * need to remove the reference to sib in n.
726 */
727 for (j = ki; j < 3 && n->kids[j+1]; j++) {
728 n->kids[j] = n->kids[j+1];
729 n->counts[j] = n->counts[j+1];
730 n->elems[j] = j<2 ? n->elems[j+1] : NULL;
731 }
732 n->kids[j] = NULL;
733 n->counts[j] = 0;
734 if (j < 3) n->elems[j] = NULL;
735 LOG((" case 3b ki=%d\n", ki));
736
737 if (!n->elems[0]) {
738 /*
739 * The root is empty and needs to be
740 * removed.
741 */
742 LOG((" shifting root!\n"));
743 t->root = sub;
744 sub->parent = NULL;
745 sfree(n);
746 }
747 }
748 }
749 n = sub;
750 }
751 if (!retval)
752 retval = n->elems[ei];
753
754 if (ei==-1)
755 return NULL; /* although this shouldn't happen */
756
757 /*
758 * Treat special case: this is the one remaining item in
759 * the tree. n is the tree root (no parent), has one
760 * element (no elems[1]), and has no kids (no kids[0]).
761 */
762 if (!n->parent && !n->elems[1] && !n->kids[0]) {
763 LOG((" removed last element in tree\n"));
764 sfree(n);
765 t->root = NULL;
766 return retval;
767 }
768
769 /*
770 * Now we have the element we want, as n->elems[ei], and we
771 * have also arranged for that element not to be the only
772 * one in its node. So...
773 */
774
775 if (!n->kids[0] && n->elems[1]) {
776 /*
777 * Case 1. n is a leaf node with more than one element,
778 * so it's _really easy_. Just delete the thing and
779 * we're done.
780 */
781 int i;
782 LOG((" case 1\n"));
783 for (i = ei; i < 2 && n->elems[i+1]; i++)
784 n->elems[i] = n->elems[i+1];
785 n->elems[i] = NULL;
786 /*
787 * Having done that to the leaf node, we now go back up
788 * the tree fixing the counts.
789 */
790 while (n->parent) {
791 int childnum;
792 childnum = (n->parent->kids[0] == n ? 0 :
793 n->parent->kids[1] == n ? 1 :
794 n->parent->kids[2] == n ? 2 : 3);
795 n->parent->counts[childnum]--;
796 n = n->parent;
797 }
798 return retval; /* finished! */
799 } else if (n->kids[ei]->elems[1]) {
800 /*
801 * Case 2a. n is an internal node, and the root of the
802 * subtree to the left of e has more than one element.
803 * So find the predecessor p to e (ie the largest node
804 * in that subtree), place it where e currently is, and
805 * then start the deletion process over again on the
806 * subtree with p as target.
807 */
808 node234 *m = n->kids[ei];
809 void *target;
810 LOG((" case 2a\n"));
811 while (m->kids[0]) {
812 m = (m->kids[3] ? m->kids[3] :
813 m->kids[2] ? m->kids[2] :
814 m->kids[1] ? m->kids[1] : m->kids[0]);
815 }
816 target = (m->elems[2] ? m->elems[2] :
817 m->elems[1] ? m->elems[1] : m->elems[0]);
818 n->elems[ei] = target;
819 index = n->counts[ei]-1;
820 n = n->kids[ei];
821 } else if (n->kids[ei+1]->elems[1]) {
822 /*
823 * Case 2b, symmetric to 2a but s/left/right/ and
824 * s/predecessor/successor/. (And s/largest/smallest/).
825 */
826 node234 *m = n->kids[ei+1];
827 void *target;
828 LOG((" case 2b\n"));
829 while (m->kids[0]) {
830 m = m->kids[0];
831 }
832 target = m->elems[0];
833 n->elems[ei] = target;
834 n = n->kids[ei+1];
835 index = 0;
836 } else {
837 /*
838 * Case 2c. n is an internal node, and the subtrees to
839 * the left and right of e both have only one element.
840 * So combine the two subnodes into a single big node
841 * with their own elements on the left and right and e
842 * in the middle, then restart the deletion process on
843 * that subtree, with e still as target.
844 */
845 node234 *a = n->kids[ei], *b = n->kids[ei+1];
846 int j;
847
848 LOG((" case 2c\n"));
849 a->elems[1] = n->elems[ei];
850 a->kids[2] = b->kids[0];
851 a->counts[2] = b->counts[0];
852 if (a->kids[2]) a->kids[2]->parent = a;
853 a->elems[2] = b->elems[0];
854 a->kids[3] = b->kids[1];
855 a->counts[3] = b->counts[1];
856 if (a->kids[3]) a->kids[3]->parent = a;
857 sfree(b);
858 n->counts[ei] = countnode234(a);
859 /*
860 * That's built the big node in a, and destroyed b. Now
861 * remove the reference to b (and e) in n.
862 */
863 for (j = ei; j < 2 && n->elems[j+1]; j++) {
864 n->elems[j] = n->elems[j+1];
865 n->kids[j+1] = n->kids[j+2];
866 n->counts[j+1] = n->counts[j+2];
867 }
868 n->elems[j] = NULL;
869 n->kids[j+1] = NULL;
870 n->counts[j+1] = 0;
871 /*
872 * It's possible, in this case, that we've just removed
873 * the only element in the root of the tree. If so,
874 * shift the root.
875 */
876 if (n->elems[0] == NULL) {
877 LOG((" shifting root!\n"));
878 t->root = a;
879 a->parent = NULL;
880 sfree(n);
881 }
882 /*
883 * Now go round the deletion process again, with n
884 * pointing at the new big node and e still the same.
885 */
886 n = a;
887 index = a->counts[0] + a->counts[1] + 1;
888 }
889 }
890 }
891 void *delpos234(tree234 *t, int index) {
892 if (index < 0 || index >= countnode234(t->root))
893 return NULL;
894 return delpos234_internal(t, index);
895 }
896 void *del234(tree234 *t, void *e) {
897 int index;
898 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
899 return NULL; /* it wasn't in there anyway */
900 return delpos234_internal(t, index); /* it's there; delete it. */
901 }
902
903 #ifdef TEST
904
905 /*
906 * Test code for the 2-3-4 tree. This code maintains an alternative
907 * representation of the data in the tree, in an array (using the
908 * obvious and slow insert and delete functions). After each tree
909 * operation, the verify() function is called, which ensures all
910 * the tree properties are preserved:
911 * - node->child->parent always equals node
912 * - tree->root->parent always equals NULL
913 * - number of kids == 0 or number of elements + 1;
914 * - tree has the same depth everywhere
915 * - every node has at least one element
916 * - subtree element counts are accurate
917 * - any NULL kid pointer is accompanied by a zero count
918 * - in a sorted tree: ordering property between elements of a
919 * node and elements of its children is preserved
920 * and also ensures the list represented by the tree is the same
921 * list it should be. (This last check also doubly verifies the
922 * ordering properties, because the `same list it should be' is by
923 * definition correctly ordered. It also ensures all nodes are
924 * distinct, because the enum functions would get caught in a loop
925 * if not.)
926 */
927
928 #include <stdarg.h>
929
930 #define srealloc realloc
931
932 /*
933 * Error reporting function.
934 */
935 void error(char *fmt, ...) {
936 va_list ap;
937 printf("ERROR: ");
938 va_start(ap, fmt);
939 vfprintf(stdout, fmt, ap);
940 va_end(ap);
941 printf("\n");
942 }
943
944 /* The array representation of the data. */
945 void **array;
946 int arraylen, arraysize;
947 cmpfn234 cmp;
948
949 /* The tree representation of the same data. */
950 tree234 *tree;
951
952 typedef struct {
953 int treedepth;
954 int elemcount;
955 } chkctx;
956
957 int chknode(chkctx *ctx, int level, node234 *node,
958 void *lowbound, void *highbound) {
959 int nkids, nelems;
960 int i;
961 int count;
962
963 /* Count the non-NULL kids. */
964 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
965 /* Ensure no kids beyond the first NULL are non-NULL. */
966 for (i = nkids; i < 4; i++)
967 if (node->kids[i]) {
968 error("node %p: nkids=%d but kids[%d] non-NULL",
969 node, nkids, i);
970 } else if (node->counts[i]) {
971 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
972 node, i, i, node->counts[i]);
973 }
974
975 /* Count the non-NULL elements. */
976 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
977 /* Ensure no elements beyond the first NULL are non-NULL. */
978 for (i = nelems; i < 3; i++)
979 if (node->elems[i]) {
980 error("node %p: nelems=%d but elems[%d] non-NULL",
981 node, nelems, i);
982 }
983
984 if (nkids == 0) {
985 /*
986 * If nkids==0, this is a leaf node; verify that the tree
987 * depth is the same everywhere.
988 */
989 if (ctx->treedepth < 0)
990 ctx->treedepth = level; /* we didn't know the depth yet */
991 else if (ctx->treedepth != level)
992 error("node %p: leaf at depth %d, previously seen depth %d",
993 node, level, ctx->treedepth);
994 } else {
995 /*
996 * If nkids != 0, then it should be nelems+1, unless nelems
997 * is 0 in which case nkids should also be 0 (and so we
998 * shouldn't be in this condition at all).
999 */
1000 int shouldkids = (nelems ? nelems+1 : 0);
1001 if (nkids != shouldkids) {
1002 error("node %p: %d elems should mean %d kids but has %d",
1003 node, nelems, shouldkids, nkids);
1004 }
1005 }
1006
1007 /*
1008 * nelems should be at least 1.
1009 */
1010 if (nelems == 0) {
1011 error("node %p: no elems", node, nkids);
1012 }
1013
1014 /*
1015 * Add nelems to the running element count of the whole tree.
1016 */
1017 ctx->elemcount += nelems;
1018
1019 /*
1020 * Check ordering property: all elements should be strictly >
1021 * lowbound, strictly < highbound, and strictly < each other in
1022 * sequence. (lowbound and highbound are NULL at edges of tree
1023 * - both NULL at root node - and NULL is considered to be <
1024 * everything and > everything. IYSWIM.)
1025 */
1026 if (cmp) {
1027 for (i = -1; i < nelems; i++) {
1028 void *lower = (i == -1 ? lowbound : node->elems[i]);
1029 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1030 if (lower && higher && cmp(lower, higher) >= 0) {
1031 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1032 node, i, lower, i+1, higher);
1033 }
1034 }
1035 }
1036
1037 /*
1038 * Check parent pointers: all non-NULL kids should have a
1039 * parent pointer coming back to this node.
1040 */
1041 for (i = 0; i < nkids; i++)
1042 if (node->kids[i]->parent != node) {
1043 error("node %p kid %d: parent ptr is %p not %p",
1044 node, i, node->kids[i]->parent, node);
1045 }
1046
1047
1048 /*
1049 * Now (finally!) recurse into subtrees.
1050 */
1051 count = nelems;
1052
1053 for (i = 0; i < nkids; i++) {
1054 void *lower = (i == 0 ? lowbound : node->elems[i-1]);
1055 void *higher = (i >= nelems ? highbound : node->elems[i]);
1056 int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
1057 if (node->counts[i] != subcount) {
1058 error("node %p kid %d: count says %d, subtree really has %d",
1059 node, i, node->counts[i], subcount);
1060 }
1061 count += subcount;
1062 }
1063
1064 return count;
1065 }
1066
1067 void verify(void) {
1068 chkctx ctx;
1069 int i;
1070 void *p;
1071
1072 ctx.treedepth = -1; /* depth unknown yet */
1073 ctx.elemcount = 0; /* no elements seen yet */
1074 /*
1075 * Verify validity of tree properties.
1076 */
1077 if (tree->root) {
1078 if (tree->root->parent != NULL)
1079 error("root->parent is %p should be null", tree->root->parent);
1080 chknode(&ctx, 0, tree->root, NULL, NULL);
1081 }
1082 printf("tree depth: %d\n", ctx.treedepth);
1083 /*
1084 * Enumerate the tree and ensure it matches up to the array.
1085 */
1086 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1087 if (i >= arraylen)
1088 error("tree contains more than %d elements", arraylen);
1089 if (array[i] != p)
1090 error("enum at position %d: array says %s, tree says %s",
1091 i, array[i], p);
1092 }
1093 if (ctx.elemcount != i) {
1094 error("tree really contains %d elements, enum gave %d",
1095 ctx.elemcount, i);
1096 }
1097 if (i < arraylen) {
1098 error("enum gave only %d elements, array has %d", i, arraylen);
1099 }
1100 i = count234(tree);
1101 if (ctx.elemcount != i) {
1102 error("tree really contains %d elements, count234 gave %d",
1103 ctx.elemcount, i);
1104 }
1105 }
1106
1107 void internal_addtest(void *elem, int index, void *realret) {
1108 int i, j;
1109 void *retval;
1110
1111 if (arraysize < arraylen+1) {
1112 arraysize = arraylen+1+256;
1113 array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
1114 srealloc(array, arraysize*sizeof(*array)));
1115 }
1116
1117 i = index;
1118 /* now i points to the first element >= elem */
1119 retval = elem; /* expect elem returned (success) */
1120 for (j = arraylen; j > i; j--)
1121 array[j] = array[j-1];
1122 array[i] = elem; /* add elem to array */
1123 arraylen++;
1124
1125 if (realret != retval) {
1126 error("add: retval was %p expected %p", realret, retval);
1127 }
1128
1129 verify();
1130 }
1131
1132 void addtest(void *elem) {
1133 int i;
1134 void *realret;
1135
1136 realret = add234(tree, elem);
1137
1138 i = 0;
1139 while (i < arraylen && cmp(elem, array[i]) > 0)
1140 i++;
1141 if (i < arraylen && !cmp(elem, array[i])) {
1142 void *retval = array[i]; /* expect that returned not elem */
1143 if (realret != retval) {
1144 error("add: retval was %p expected %p", realret, retval);
1145 }
1146 } else
1147 internal_addtest(elem, i, realret);
1148 }
1149
1150 void addpostest(void *elem, int i) {
1151 void *realret;
1152
1153 realret = addpos234(tree, elem, i);
1154
1155 internal_addtest(elem, i, realret);
1156 }
1157
1158 void delpostest(int i) {
1159 int index = i;
1160 void *elem = array[i], *ret;
1161
1162 /* i points to the right element */
1163 while (i < arraylen-1) {
1164 array[i] = array[i+1];
1165 i++;
1166 }
1167 arraylen--; /* delete elem from array */
1168
1169 if (tree->cmp)
1170 ret = del234(tree, elem);
1171 else
1172 ret = delpos234(tree, index);
1173
1174 if (ret != elem) {
1175 error("del returned %p, expected %p", ret, elem);
1176 }
1177
1178 verify();
1179 }
1180
1181 void deltest(void *elem) {
1182 int i;
1183
1184 i = 0;
1185 while (i < arraylen && cmp(elem, array[i]) > 0)
1186 i++;
1187 if (i >= arraylen || cmp(elem, array[i]) != 0)
1188 return; /* don't do it! */
1189 delpostest(i);
1190 }
1191
1192 /* A sample data set and test utility. Designed for pseudo-randomness,
1193 * and yet repeatability. */
1194
1195 /*
1196 * This random number generator uses the `portable implementation'
1197 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1198 * change it if not.
1199 */
1200 int randomnumber(unsigned *seed) {
1201 *seed *= 1103515245;
1202 *seed += 12345;
1203 return ((*seed) / 65536) % 32768;
1204 }
1205
1206 int mycmp(void *av, void *bv) {
1207 char const *a = (char const *)av;
1208 char const *b = (char const *)bv;
1209 return strcmp(a, b);
1210 }
1211
1212 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1213
1214 char *strings[] = {
1215 "a", "ab", "absque", "coram", "de",
1216 "palam", "clam", "cum", "ex", "e",
1217 "sine", "tenus", "pro", "prae",
1218 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1219 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1220 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1221 "murfl", "spoo", "breen", "flarn", "octothorpe",
1222 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1223 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1224 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1225 "wand", "ring", "amulet"
1226 };
1227
1228 #define NSTR lenof(strings)
1229
1230 int findtest(void) {
1231 const static int rels[] = {
1232 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1233 };
1234 const static char *const relnames[] = {
1235 "EQ", "GE", "LE", "LT", "GT"
1236 };
1237 int i, j, rel, index;
1238 char *p, *ret, *realret, *realret2;
1239 int lo, hi, mid, c;
1240
1241 for (i = 0; i < NSTR; i++) {
1242 p = strings[i];
1243 for (j = 0; j < sizeof(rels)/sizeof(*rels); j++) {
1244 rel = rels[j];
1245
1246 lo = 0; hi = arraylen-1;
1247 while (lo <= hi) {
1248 mid = (lo + hi) / 2;
1249 c = strcmp(p, array[mid]);
1250 if (c < 0)
1251 hi = mid-1;
1252 else if (c > 0)
1253 lo = mid+1;
1254 else
1255 break;
1256 }
1257
1258 if (c == 0) {
1259 if (rel == REL234_LT)
1260 ret = (mid > 0 ? array[--mid] : NULL);
1261 else if (rel == REL234_GT)
1262 ret = (mid < arraylen-1 ? array[++mid] : NULL);
1263 else
1264 ret = array[mid];
1265 } else {
1266 assert(lo == hi+1);
1267 if (rel == REL234_LT || rel == REL234_LE) {
1268 mid = hi;
1269 ret = (hi >= 0 ? array[hi] : NULL);
1270 } else if (rel == REL234_GT || rel == REL234_GE) {
1271 mid = lo;
1272 ret = (lo < arraylen ? array[lo] : NULL);
1273 } else
1274 ret = NULL;
1275 }
1276
1277 realret = findrelpos234(tree, p, NULL, rel, &index);
1278 if (realret != ret) {
1279 error("find(\"%s\",%s) gave %s should be %s",
1280 p, relnames[j], realret, ret);
1281 }
1282 if (realret && index != mid) {
1283 error("find(\"%s\",%s) gave %d should be %d",
1284 p, relnames[j], index, mid);
1285 }
1286 if (realret && rel == REL234_EQ) {
1287 realret2 = index234(tree, index);
1288 if (realret2 != realret) {
1289 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1290 p, relnames[j], realret, index, index, realret2);
1291 }
1292 }
1293 #if 0
1294 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1295 realret, index);
1296 #endif
1297 }
1298 }
1299
1300 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1301 if (arraylen && (realret != array[0] || index != 0)) {
1302 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1303 realret, index, array[0]);
1304 } else if (!arraylen && (realret != NULL)) {
1305 error("find(NULL,GT) gave %s(%d) should be NULL",
1306 realret, index);
1307 }
1308
1309 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1310 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
1311 error("find(NULL,LT) gave %s(%d) should be %s(0)",
1312 realret, index, array[arraylen-1]);
1313 } else if (!arraylen && (realret != NULL)) {
1314 error("find(NULL,LT) gave %s(%d) should be NULL",
1315 realret, index);
1316 }
1317 }
1318
1319 int main(void) {
1320 int in[NSTR];
1321 int i, j, k;
1322 unsigned seed = 0;
1323
1324 for (i = 0; i < NSTR; i++) in[i] = 0;
1325 array = NULL;
1326 arraylen = arraysize = 0;
1327 tree = newtree234(mycmp);
1328 cmp = mycmp;
1329
1330 verify();
1331 for (i = 0; i < 10000; i++) {
1332 j = randomnumber(&seed);
1333 j %= NSTR;
1334 printf("trial: %d\n", i);
1335 if (in[j]) {
1336 printf("deleting %s (%d)\n", strings[j], j);
1337 deltest(strings[j]);
1338 in[j] = 0;
1339 } else {
1340 printf("adding %s (%d)\n", strings[j], j);
1341 addtest(strings[j]);
1342 in[j] = 1;
1343 }
1344 findtest();
1345 }
1346
1347 while (arraylen > 0) {
1348 j = randomnumber(&seed);
1349 j %= arraylen;
1350 deltest(array[j]);
1351 }
1352
1353 freetree234(tree);
1354
1355 /*
1356 * Now try an unsorted tree. We don't really need to test
1357 * delpos234 because we know del234 is based on it, so it's
1358 * already been tested in the above sorted-tree code; but for
1359 * completeness we'll use it to tear down our unsorted tree
1360 * once we've built it.
1361 */
1362 tree = newtree234(NULL);
1363 cmp = NULL;
1364 verify();
1365 for (i = 0; i < 1000; i++) {
1366 printf("trial: %d\n", i);
1367 j = randomnumber(&seed);
1368 j %= NSTR;
1369 k = randomnumber(&seed);
1370 k %= count234(tree)+1;
1371 printf("adding string %s at index %d\n", strings[j], k);
1372 addpostest(strings[j], k);
1373 }
1374 while (count234(tree) > 0) {
1375 printf("cleanup: tree size %d\n", count234(tree));
1376 j = randomnumber(&seed);
1377 j %= count234(tree);
1378 printf("deleting string %s from index %d\n", array[j], j);
1379 delpostest(j);
1380 }
1381
1382 return 0;
1383 }
1384
1385 #endif