2 * tree234.c: reasonably generic 2-3-4 tree routines. Currently
3 * supports insert, delete, find and iterate operations.
11 #define mknew(typ) ( (typ *) malloc (sizeof (typ)) )
15 #define LOG(x) (printf x)
32 * Create a 2-3-4 tree.
34 tree234
*newtree234(cmpfn234 cmp
) {
35 tree234
*ret
= mknew(tree234
);
36 LOG(("created tree %p\n", ret
));
43 * Free a 2-3-4 tree (not including freeing the elements).
45 static void freenode234(node234
*n
) {
48 freenode234(n
->kids
[0]);
49 freenode234(n
->kids
[1]);
50 freenode234(n
->kids
[2]);
51 freenode234(n
->kids
[3]);
54 void freetree234(tree234
*t
) {
60 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
61 * an existing element compares equal, returns that.
63 void *add234(tree234
*t
, void *e
) {
64 node234
*n
, **np
, *left
, *right
;
68 LOG(("adding node %p to tree %p\n", e
, t
));
69 if (t
->root
== NULL
) {
70 t
->root
= mknew(node234
);
71 t
->root
->elems
[1] = t
->root
->elems
[2] = NULL
;
72 t
->root
->kids
[0] = t
->root
->kids
[1] = NULL
;
73 t
->root
->kids
[2] = t
->root
->kids
[3] = NULL
;
74 t
->root
->parent
= NULL
;
75 t
->root
->elems
[0] = e
;
76 LOG((" created root %p\n", t
->root
));
83 LOG((" node %p: %p [%p] %p [%p] %p [%p] %p\n",
84 n
, n
->kids
[0], n
->elems
[0], n
->kids
[1], n
->elems
[1],
85 n
->kids
[2], n
->elems
[2], n
->kids
[3]));
86 if ((c
= t
->cmp(e
, n
->elems
[0])) < 0)
89 return n
->elems
[0]; /* already exists */
90 else if (n
->elems
[1] == NULL
|| (c
= t
->cmp(e
, n
->elems
[1])) < 0)
93 return n
->elems
[1]; /* already exists */
94 else if (n
->elems
[2] == NULL
|| (c
= t
->cmp(e
, n
->elems
[2])) < 0)
97 return n
->elems
[2]; /* already exists */
100 LOG((" moving to child %d (%p)\n", np
- n
->kids
, *np
));
104 * We need to insert the new element in n at position np.
109 LOG((" at %p: %p [%p] %p [%p] %p [%p] %p\n",
110 n
, n
->kids
[0], n
->elems
[0], n
->kids
[1], n
->elems
[1],
111 n
->kids
[2], n
->elems
[2], n
->kids
[3]));
112 LOG((" need to insert %p [%p] %p at position %d\n",
113 left
, e
, right
, np
- n
->kids
));
114 if (n
->elems
[1] == NULL
) {
116 * Insert in a 2-node; simple.
118 if (np
== &n
->kids
[0]) {
119 LOG((" inserting on left of 2-node\n"));
120 n
->kids
[2] = n
->kids
[1];
121 n
->elems
[1] = n
->elems
[0];
125 } else { /* np == &n->kids[1] */
126 LOG((" inserting on right of 2-node\n"));
131 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
132 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
133 if (n
->kids
[2]) n
->kids
[2]->parent
= n
;
136 } else if (n
->elems
[2] == NULL
) {
138 * Insert in a 3-node; simple.
140 if (np
== &n
->kids
[0]) {
141 LOG((" inserting on left of 3-node\n"));
142 n
->kids
[3] = n
->kids
[2];
143 n
->elems
[2] = n
->elems
[1];
144 n
->kids
[2] = n
->kids
[1];
145 n
->elems
[1] = n
->elems
[0];
149 } else if (np
== &n
->kids
[1]) {
150 LOG((" inserting in middle of 3-node\n"));
151 n
->kids
[3] = n
->kids
[2];
152 n
->elems
[2] = n
->elems
[1];
156 } else { /* np == &n->kids[2] */
157 LOG((" inserting on right of 3-node\n"));
162 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
163 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
164 if (n
->kids
[2]) n
->kids
[2]->parent
= n
;
165 if (n
->kids
[3]) n
->kids
[3]->parent
= n
;
169 node234
*m
= mknew(node234
);
170 m
->parent
= n
->parent
;
171 LOG((" splitting a 4-node; created new node %p\n", m
));
173 * Insert in a 4-node; split into a 2-node and a
174 * 3-node, and move focus up a level.
176 * I don't think it matters which way round we put the
177 * 2 and the 3. For simplicity, we'll put the 3 first
180 if (np
== &n
->kids
[0]) {
184 m
->elems
[1] = n
->elems
[0];
185 m
->kids
[2] = n
->kids
[1];
187 n
->kids
[0] = n
->kids
[2];
188 n
->elems
[0] = n
->elems
[2];
189 n
->kids
[1] = n
->kids
[3];
190 } else if (np
== &n
->kids
[1]) {
191 m
->kids
[0] = n
->kids
[0];
192 m
->elems
[0] = n
->elems
[0];
197 n
->kids
[0] = n
->kids
[2];
198 n
->elems
[0] = n
->elems
[2];
199 n
->kids
[1] = n
->kids
[3];
200 } else if (np
== &n
->kids
[2]) {
201 m
->kids
[0] = n
->kids
[0];
202 m
->elems
[0] = n
->elems
[0];
203 m
->kids
[1] = n
->kids
[1];
204 m
->elems
[1] = n
->elems
[1];
208 n
->elems
[0] = n
->elems
[2];
209 n
->kids
[1] = n
->kids
[3];
210 } else { /* np == &n->kids[3] */
211 m
->kids
[0] = n
->kids
[0];
212 m
->elems
[0] = n
->elems
[0];
213 m
->kids
[1] = n
->kids
[1];
214 m
->elems
[1] = n
->elems
[1];
215 m
->kids
[2] = n
->kids
[2];
221 m
->kids
[3] = n
->kids
[3] = n
->kids
[2] = NULL
;
222 m
->elems
[2] = n
->elems
[2] = n
->elems
[1] = NULL
;
223 if (m
->kids
[0]) m
->kids
[0]->parent
= m
;
224 if (m
->kids
[1]) m
->kids
[1]->parent
= m
;
225 if (m
->kids
[2]) m
->kids
[2]->parent
= m
;
226 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
227 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
228 LOG((" left (%p): %p [%p] %p [%p] %p\n", m
,
229 m
->kids
[0], m
->elems
[0],
230 m
->kids
[1], m
->elems
[1],
232 LOG((" right (%p): %p [%p] %p\n", n
,
233 n
->kids
[0], n
->elems
[0],
239 np
= (n
->parent
->kids
[0] == n ?
&n
->parent
->kids
[0] :
240 n
->parent
->kids
[1] == n ?
&n
->parent
->kids
[1] :
241 n
->parent
->kids
[2] == n ?
&n
->parent
->kids
[2] :
242 &n
->parent
->kids
[3]);
247 * If we've come out of here by `break', n will still be
248 * non-NULL and we've finished. If we've come here because n is
249 * NULL, we need to create a new root for the tree because the
250 * old one has just split into two.
253 LOG((" root is overloaded, split into two\n"));
254 t
->root
= mknew(node234
);
255 t
->root
->kids
[0] = left
;
256 t
->root
->elems
[0] = e
;
257 t
->root
->kids
[1] = right
;
258 t
->root
->elems
[1] = NULL
;
259 t
->root
->kids
[2] = NULL
;
260 t
->root
->elems
[2] = NULL
;
261 t
->root
->kids
[3] = NULL
;
262 t
->root
->parent
= NULL
;
263 if (t
->root
->kids
[0]) t
->root
->kids
[0]->parent
= t
->root
;
264 if (t
->root
->kids
[1]) t
->root
->kids
[1]->parent
= t
->root
;
265 LOG((" new root is %p [%p] %p\n",
266 t
->root
->kids
[0], t
->root
->elems
[0], t
->root
->kids
[1]));
273 * Find an element e in a 2-3-4 tree t. Returns NULL if not found.
274 * e is always passed as the first argument to cmp, so cmp can be
275 * an asymmetric function if desired. cmp can also be passed as
276 * NULL, in which case the compare function from the tree proper
279 void *find234(tree234
*t
, void *e
, cmpfn234 cmp
) {
291 if ( (c
= cmp(e
, n
->elems
[0])) < 0)
295 else if (n
->elems
[1] == NULL
|| (c
= cmp(e
, n
->elems
[1])) < 0)
299 else if (n
->elems
[2] == NULL
|| (c
= cmp(e
, n
->elems
[2])) < 0)
308 * We've found our way to the bottom of the tree and we know
309 * where we would insert this node if we wanted to. But it
316 * Delete an element e in a 2-3-4 tree. Does not free the element,
317 * merely removes all links to it from the tree nodes.
319 void del234(tree234
*t
, void *e
) {
324 LOG(("deleting %p from tree %p\n", e
, t
));
331 LOG((" node %p: %p [%p] %p [%p] %p [%p] %p\n",
332 n
, n
->kids
[0], n
->elems
[0], n
->kids
[1], n
->elems
[1],
333 n
->kids
[2], n
->elems
[2], n
->kids
[3]));
334 if ((c
= t
->cmp(e
, n
->elems
[0])) < 0) {
338 } else if (n
->elems
[1] == NULL
|| (c
= t
->cmp(e
, n
->elems
[1])) < 0) {
342 } else if (n
->elems
[2] == NULL
|| (c
= t
->cmp(e
, n
->elems
[2])) < 0) {
350 * Recurse down to subtree ki. If it has only one element,
351 * we have to do some transformation to start with.
353 LOG((" moving to subtree %d\n", ki
));
355 if (!sub
->elems
[1]) {
356 LOG((" subtree has only one element!\n", ki
));
357 if (ki
> 0 && n
->kids
[ki
-1]->elems
[1]) {
359 * Case 3a, left-handed variant. Child ki has
360 * only one element, but child ki-1 has two or
361 * more. So we need to move a subtree from ki-1
366 * [more] a A b B c d D e [more] a A b c C d D e
368 node234
*sib
= n
->kids
[ki
-1];
369 int lastelem
= (sib
->elems
[2] ?
2 :
370 sib
->elems
[1] ?
1 : 0);
371 sub
->kids
[2] = sub
->kids
[1];
372 sub
->elems
[1] = sub
->elems
[0];
373 sub
->kids
[1] = sub
->kids
[0];
374 sub
->elems
[0] = n
->elems
[ki
-1];
375 sub
->kids
[0] = sib
->kids
[lastelem
+1];
376 n
->elems
[ki
-1] = sib
->elems
[lastelem
];
377 sib
->kids
[lastelem
+1] = NULL
;
378 sib
->elems
[lastelem
] = NULL
;
379 LOG((" case 3a left\n"));
380 } else if (ki
< 3 && n
->kids
[ki
+1] &&
381 n
->kids
[ki
+1]->elems
[1]) {
383 * Case 3a, right-handed variant. ki has only
384 * one element but ki+1 has two or more. Move a
385 * subtree from ki+1 to ki.
389 * a A b c C d D e [more] a A b B c d D e [more]
391 node234
*sib
= n
->kids
[ki
+1];
393 sub
->elems
[1] = n
->elems
[ki
];
394 sub
->kids
[2] = sib
->kids
[0];
395 n
->elems
[ki
] = sib
->elems
[0];
396 sib
->kids
[0] = sib
->kids
[1];
397 for (j
= 0; j
< 2 && sib
->elems
[j
+1]; j
++) {
398 sib
->kids
[j
+1] = sib
->kids
[j
+2];
399 sib
->elems
[j
] = sib
->elems
[j
+1];
401 sib
->kids
[j
+1] = NULL
;
402 sib
->elems
[j
] = NULL
;
403 LOG((" case 3a right\n"));
406 * Case 3b. ki has only one element, and has no
407 * neighbour with more than one. So pick a
408 * neighbour and merge it with ki, taking an
409 * element down from n to go in the middle.
413 * a A b c C d a A b B c C d
415 * (Since at all points we have avoided
416 * descending to a node with only one element,
417 * we can be sure that n is not reduced to
418 * nothingness by this move, _unless_ it was
419 * the very first node, ie the root of the
420 * tree. In that case we remove the now-empty
421 * root and replace it with its single large
432 sub
->kids
[3] = sub
->kids
[1];
433 sub
->elems
[2] = sub
->elems
[0];
434 sub
->kids
[2] = sub
->kids
[0];
435 sub
->elems
[1] = n
->elems
[ki
];
436 sub
->kids
[1] = sib
->kids
[1];
437 sub
->elems
[0] = sib
->elems
[0];
438 sub
->kids
[0] = sib
->kids
[0];
443 * That's built the big node in sub. Now we
444 * need to remove the reference to sib in n.
446 for (j
= ki
; j
< 3 && n
->kids
[j
+1]; j
++) {
447 n
->kids
[j
] = n
->kids
[j
+1];
448 n
->elems
[j
] = j
<2 ? n
->elems
[j
+1] : NULL
;
451 if (j
< 3) n
->elems
[j
] = NULL
;
456 * The root is empty and needs to be
459 LOG((" shifting root!\n"));
469 return; /* nothing to do; `already removed' */
472 * Treat special case: this is the one remaining item in
473 * the tree. n is the tree root (no parent), has one
474 * element (no elems[1]), and has no kids (no kids[0]).
476 if (!n
->parent
&& !n
->elems
[1] && !n
->kids
[0]) {
477 LOG((" removed last element in tree\n"));
484 * Now we have the element we want, as n->elems[ei], and we
485 * have also arranged for that element not to be the only
486 * one in its node. So...
489 if (!n
->kids
[0] && n
->elems
[1]) {
491 * Case 1. n is a leaf node with more than one element,
492 * so it's _really easy_. Just delete the thing and
497 for (i
= ei
; i
< 3 && n
->elems
[i
+1]; i
++)
498 n
->elems
[i
] = n
->elems
[i
+1];
500 return; /* finished! */
501 } else if (n
->kids
[ei
]->elems
[1]) {
503 * Case 2a. n is an internal node, and the root of the
504 * subtree to the left of e has more than one element.
505 * So find the predecessor p to e (ie the largest node
506 * in that subtree), place it where e currently is, and
507 * then start the deletion process over again on the
508 * subtree with p as target.
510 node234
*m
= n
->kids
[ei
];
514 m
= (m
->kids
[3] ? m
->kids
[3] :
515 m
->kids
[2] ? m
->kids
[2] :
516 m
->kids
[1] ? m
->kids
[1] : m
->kids
[0]);
518 target
= (m
->elems
[2] ? m
->elems
[2] :
519 m
->elems
[1] ? m
->elems
[1] : m
->elems
[0]);
520 n
->elems
[ei
] = target
;
523 } else if (n
->kids
[ei
+1]->elems
[1]) {
525 * Case 2b, symmetric to 2a but s/left/right/ and
526 * s/predecessor/successor/. (And s/largest/smallest/).
528 node234
*m
= n
->kids
[ei
+1];
534 target
= m
->elems
[0];
535 n
->elems
[ei
] = target
;
540 * Case 2c. n is an internal node, and the subtrees to
541 * the left and right of e both have only one element.
542 * So combine the two subnodes into a single big node
543 * with their own elements on the left and right and e
544 * in the middle, then restart the deletion process on
545 * that subtree, with e still as target.
547 node234
*a
= n
->kids
[ei
], *b
= n
->kids
[ei
+1];
551 a
->elems
[1] = n
->elems
[ei
];
552 a
->kids
[2] = b
->kids
[0];
553 a
->elems
[2] = b
->elems
[0];
554 a
->kids
[3] = b
->kids
[1];
557 * That's built the big node in a, and destroyed b. Now
558 * remove the reference to b (and e) in n.
560 for (j
= ei
; j
< 2 && n
->elems
[j
+1]; j
++) {
561 n
->elems
[j
] = n
->elems
[j
+1];
562 n
->kids
[j
+1] = n
->kids
[j
+2];
567 * Now go round the deletion process again, with n
568 * pointing at the new big node and e still the same.
576 * Iterate over the elements of a tree234, in order.
578 void *first234(tree234
*t
, enum234
*e
) {
579 node234
*n
= t
->root
;
589 void *next234(enum234
*e
) {
590 node234
*n
= e
->node
;
593 if (n
->kids
[pos
+1]) {
602 if (pos
== 0 && n
->elems
[1]) {
608 node234
*nn
= n
->parent
;
610 return NULL
; /* end of tree */
611 pos
= (nn
->kids
[0] == n ?
0 :
612 nn
->kids
[1] == n ?
1 :
613 nn
->kids
[2] == n ?
2 : 3);
615 } while (pos
== 3 || n
->kids
[pos
+1] == NULL
);
619 return n
->elems
[pos
];
624 int pnode(node234
*n
, int level
) {
625 printf("%*s%p\n", level
*4, "", n
);
626 if (n
->kids
[0]) pnode(n
->kids
[0], level
+1);
627 if (n
->elems
[0]) printf("%*s\"%s\"\n", level
*4+4, "", n
->elems
[0]);
628 if (n
->kids
[1]) pnode(n
->kids
[1], level
+1);
629 if (n
->elems
[1]) printf("%*s\"%s\"\n", level
*4+4, "", n
->elems
[1]);
630 if (n
->kids
[2]) pnode(n
->kids
[2], level
+1);
631 if (n
->elems
[2]) printf("%*s\"%s\"\n", level
*4+4, "", n
->elems
[2]);
632 if (n
->kids
[3]) pnode(n
->kids
[3], level
+1);
634 int ptree(tree234
*t
) {
638 printf("empty tree\n");
641 int cmp(void *av
, void *bv
) {
642 char *a
= (char *)av
;
643 char *b
= (char *)bv
;
648 tree234
*t
= newtree234(cmp
);
650 add234(t
, "Richard");
657 add234(t
, "Rabbits");
662 add234(t
, "Invisible");
663 add234(t
, "Vegetables");
666 del234(t
, find234(t
, "Richard", NULL
));
668 del234(t
, find234(t
, "Of", NULL
));
670 del234(t
, find234(t
, "York", NULL
));
672 del234(t
, find234(t
, "Gave", NULL
));
674 del234(t
, find234(t
, "Battle", NULL
));
676 del234(t
, find234(t
, "In", NULL
));
678 del234(t
, find234(t
, "Vain", NULL
));
680 del234(t
, find234(t
, "Rabbits", NULL
));
682 del234(t
, find234(t
, "On", NULL
));
684 del234(t
, find234(t
, "Your", NULL
));
686 del234(t
, find234(t
, "Garden", NULL
));
688 del234(t
, find234(t
, "Bring", NULL
));
690 del234(t
, find234(t
, "Invisible", NULL
));
692 del234(t
, find234(t
, "Vegetables", NULL
));