2 * tree234.c: reasonably generic 2-3-4 tree routines. Currently
3 * supports insert, delete, find and iterate operations.
11 #define mknew(typ) ( (typ *) malloc (sizeof (typ)) )
15 #define LOG(x) (printf x)
32 * Create a 2-3-4 tree.
34 tree234
*newtree234(cmpfn234 cmp
) {
35 tree234
*ret
= mknew(tree234
);
36 LOG(("created tree %p\n", ret
));
43 * Free a 2-3-4 tree (not including freeing the elements).
45 static void freenode234(node234
*n
) {
48 freenode234(n
->kids
[0]);
49 freenode234(n
->kids
[1]);
50 freenode234(n
->kids
[2]);
51 freenode234(n
->kids
[3]);
54 void freetree234(tree234
*t
) {
60 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
61 * an existing element compares equal, returns that.
63 void *add234(tree234
*t
, void *e
) {
64 node234
*n
, **np
, *left
, *right
;
68 LOG(("adding node %p to tree %p\n", e
, t
));
69 if (t
->root
== NULL
) {
70 t
->root
= mknew(node234
);
71 t
->root
->elems
[1] = t
->root
->elems
[2] = NULL
;
72 t
->root
->kids
[0] = t
->root
->kids
[1] = NULL
;
73 t
->root
->kids
[2] = t
->root
->kids
[3] = NULL
;
74 t
->root
->parent
= NULL
;
75 t
->root
->elems
[0] = e
;
76 LOG((" created root %p\n", t
->root
));
83 LOG((" node %p: %p [%p] %p [%p] %p [%p] %p\n",
84 n
, n
->kids
[0], n
->elems
[0], n
->kids
[1], n
->elems
[1],
85 n
->kids
[2], n
->elems
[2], n
->kids
[3]));
86 if ((c
= t
->cmp(e
, n
->elems
[0])) < 0)
89 return n
->elems
[0]; /* already exists */
90 else if (n
->elems
[1] == NULL
|| (c
= t
->cmp(e
, n
->elems
[1])) < 0)
93 return n
->elems
[1]; /* already exists */
94 else if (n
->elems
[2] == NULL
|| (c
= t
->cmp(e
, n
->elems
[2])) < 0)
97 return n
->elems
[2]; /* already exists */
100 LOG((" moving to child %d (%p)\n", np
- n
->kids
, *np
));
104 * We need to insert the new element in n at position np.
109 LOG((" at %p: %p [%p] %p [%p] %p [%p] %p\n",
110 n
, n
->kids
[0], n
->elems
[0], n
->kids
[1], n
->elems
[1],
111 n
->kids
[2], n
->elems
[2], n
->kids
[3]));
112 LOG((" need to insert %p [%p] %p at position %d\n",
113 left
, e
, right
, np
- n
->kids
));
114 if (n
->elems
[1] == NULL
) {
116 * Insert in a 2-node; simple.
118 if (np
== &n
->kids
[0]) {
119 LOG((" inserting on left of 2-node\n"));
120 n
->kids
[2] = n
->kids
[1];
121 n
->elems
[1] = n
->elems
[0];
125 } else { /* np == &n->kids[1] */
126 LOG((" inserting on right of 2-node\n"));
131 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
132 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
133 if (n
->kids
[2]) n
->kids
[2]->parent
= n
;
136 } else if (n
->elems
[2] == NULL
) {
138 * Insert in a 3-node; simple.
140 if (np
== &n
->kids
[0]) {
141 LOG((" inserting on left of 3-node\n"));
142 n
->kids
[3] = n
->kids
[2];
143 n
->elems
[2] = n
->elems
[1];
144 n
->kids
[2] = n
->kids
[1];
145 n
->elems
[1] = n
->elems
[0];
149 } else if (np
== &n
->kids
[1]) {
150 LOG((" inserting in middle of 3-node\n"));
151 n
->kids
[3] = n
->kids
[2];
152 n
->elems
[2] = n
->elems
[1];
156 } else { /* np == &n->kids[2] */
157 LOG((" inserting on right of 3-node\n"));
162 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
163 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
164 if (n
->kids
[2]) n
->kids
[2]->parent
= n
;
165 if (n
->kids
[3]) n
->kids
[3]->parent
= n
;
169 node234
*m
= mknew(node234
);
170 m
->parent
= n
->parent
;
171 LOG((" splitting a 4-node; created new node %p\n", m
));
173 * Insert in a 4-node; split into a 2-node and a
174 * 3-node, and move focus up a level.
176 * I don't think it matters which way round we put the
177 * 2 and the 3. For simplicity, we'll put the 3 first
180 if (np
== &n
->kids
[0]) {
184 m
->elems
[1] = n
->elems
[0];
185 m
->kids
[2] = n
->kids
[1];
187 n
->kids
[0] = n
->kids
[2];
188 n
->elems
[0] = n
->elems
[2];
189 n
->kids
[1] = n
->kids
[3];
190 } else if (np
== &n
->kids
[1]) {
191 m
->kids
[0] = n
->kids
[0];
192 m
->elems
[0] = n
->elems
[0];
197 n
->kids
[0] = n
->kids
[2];
198 n
->elems
[0] = n
->elems
[2];
199 n
->kids
[1] = n
->kids
[3];
200 } else if (np
== &n
->kids
[2]) {
201 m
->kids
[0] = n
->kids
[0];
202 m
->elems
[0] = n
->elems
[0];
203 m
->kids
[1] = n
->kids
[1];
204 m
->elems
[1] = n
->elems
[1];
208 n
->elems
[0] = n
->elems
[2];
209 n
->kids
[1] = n
->kids
[3];
210 } else { /* np == &n->kids[3] */
211 m
->kids
[0] = n
->kids
[0];
212 m
->elems
[0] = n
->elems
[0];
213 m
->kids
[1] = n
->kids
[1];
214 m
->elems
[1] = n
->elems
[1];
215 m
->kids
[2] = n
->kids
[2];
221 m
->kids
[3] = n
->kids
[3] = n
->kids
[2] = NULL
;
222 m
->elems
[2] = n
->elems
[2] = n
->elems
[1] = NULL
;
223 if (m
->kids
[0]) m
->kids
[0]->parent
= m
;
224 if (m
->kids
[1]) m
->kids
[1]->parent
= m
;
225 if (m
->kids
[2]) m
->kids
[2]->parent
= m
;
226 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
227 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
228 LOG((" left (%p): %p [%p] %p [%p] %p\n", m
,
229 m
->kids
[0], m
->elems
[0],
230 m
->kids
[1], m
->elems
[1],
232 LOG((" right (%p): %p [%p] %p\n", n
,
233 n
->kids
[0], n
->elems
[0],
239 np
= (n
->parent
->kids
[0] == n ?
&n
->parent
->kids
[0] :
240 n
->parent
->kids
[1] == n ?
&n
->parent
->kids
[1] :
241 n
->parent
->kids
[2] == n ?
&n
->parent
->kids
[2] :
242 &n
->parent
->kids
[3]);
247 * If we've come out of here by `break', n will still be
248 * non-NULL and we've finished. If we've come here because n is
249 * NULL, we need to create a new root for the tree because the
250 * old one has just split into two.
253 LOG((" root is overloaded, split into two\n"));
254 t
->root
= mknew(node234
);
255 t
->root
->kids
[0] = left
;
256 t
->root
->elems
[0] = e
;
257 t
->root
->kids
[1] = right
;
258 t
->root
->elems
[1] = NULL
;
259 t
->root
->kids
[2] = NULL
;
260 t
->root
->elems
[2] = NULL
;
261 t
->root
->kids
[3] = NULL
;
262 t
->root
->parent
= NULL
;
263 if (t
->root
->kids
[0]) t
->root
->kids
[0]->parent
= t
->root
;
264 if (t
->root
->kids
[1]) t
->root
->kids
[1]->parent
= t
->root
;
265 LOG((" new root is %p [%p] %p\n",
266 t
->root
->kids
[0], t
->root
->elems
[0], t
->root
->kids
[1]));
273 * Find an element e in a 2-3-4 tree t. Returns NULL if not found.
274 * e is always passed as the first argument to cmp, so cmp can be
275 * an asymmetric function if desired. cmp can also be passed as
276 * NULL, in which case the compare function from the tree proper
279 void *find234(tree234
*t
, void *e
, cmpfn234 cmp
) {
291 if ( (c
= cmp(e
, n
->elems
[0])) < 0)
295 else if (n
->elems
[1] == NULL
|| (c
= cmp(e
, n
->elems
[1])) < 0)
299 else if (n
->elems
[2] == NULL
|| (c
= cmp(e
, n
->elems
[2])) < 0)
308 * We've found our way to the bottom of the tree and we know
309 * where we would insert this node if we wanted to. But it
316 * Delete an element e in a 2-3-4 tree. Does not free the element,
317 * merely removes all links to it from the tree nodes.
319 void del234(tree234
*t
, void *e
) {
324 LOG(("deleting %p from tree %p\n", e
, t
));
331 LOG((" node %p: %p [%p] %p [%p] %p [%p] %p\n",
332 n
, n
->kids
[0], n
->elems
[0], n
->kids
[1], n
->elems
[1],
333 n
->kids
[2], n
->elems
[2], n
->kids
[3]));
334 if ((c
= t
->cmp(e
, n
->elems
[0])) < 0) {
338 } else if (n
->elems
[1] == NULL
|| (c
= t
->cmp(e
, n
->elems
[1])) < 0) {
342 } else if (n
->elems
[2] == NULL
|| (c
= t
->cmp(e
, n
->elems
[2])) < 0) {
350 * Recurse down to subtree ki. If it has only one element,
351 * we have to do some transformation to start with.
353 LOG((" moving to subtree %d\n", ki
));
355 if (!sub
->elems
[1]) {
356 LOG((" subtree has only one element!\n", ki
));
357 if (ki
> 0 && n
->kids
[ki
-1]->elems
[1]) {
359 * Case 3a, left-handed variant. Child ki has
360 * only one element, but child ki-1 has two or
361 * more. So we need to move a subtree from ki-1
366 * [more] a A b B c d D e [more] a A b c C d D e
368 node234
*sib
= n
->kids
[ki
-1];
369 int lastelem
= (sib
->elems
[2] ?
2 :
370 sib
->elems
[1] ?
1 : 0);
371 sub
->kids
[2] = sub
->kids
[1];
372 sub
->elems
[1] = sub
->elems
[0];
373 sub
->kids
[1] = sub
->kids
[0];
374 sub
->elems
[0] = n
->elems
[ki
-1];
375 sub
->kids
[0] = sib
->kids
[lastelem
+1];
376 if (sub
->kids
[0]) sub
->kids
[0]->parent
= sub
;
377 n
->elems
[ki
-1] = sib
->elems
[lastelem
];
378 sib
->kids
[lastelem
+1] = NULL
;
379 sib
->elems
[lastelem
] = NULL
;
380 LOG((" case 3a left\n"));
381 } else if (ki
< 3 && n
->kids
[ki
+1] &&
382 n
->kids
[ki
+1]->elems
[1]) {
384 * Case 3a, right-handed variant. ki has only
385 * one element but ki+1 has two or more. Move a
386 * subtree from ki+1 to ki.
390 * a A b c C d D e [more] a A b B c d D e [more]
392 node234
*sib
= n
->kids
[ki
+1];
394 sub
->elems
[1] = n
->elems
[ki
];
395 sub
->kids
[2] = sib
->kids
[0];
396 if (sub
->kids
[2]) sub
->kids
[2]->parent
= sub
;
397 n
->elems
[ki
] = sib
->elems
[0];
398 sib
->kids
[0] = sib
->kids
[1];
399 for (j
= 0; j
< 2 && sib
->elems
[j
+1]; j
++) {
400 sib
->kids
[j
+1] = sib
->kids
[j
+2];
401 sib
->elems
[j
] = sib
->elems
[j
+1];
403 sib
->kids
[j
+1] = NULL
;
404 sib
->elems
[j
] = NULL
;
405 LOG((" case 3a right\n"));
408 * Case 3b. ki has only one element, and has no
409 * neighbour with more than one. So pick a
410 * neighbour and merge it with ki, taking an
411 * element down from n to go in the middle.
415 * a A b c C d a A b B c C d
417 * (Since at all points we have avoided
418 * descending to a node with only one element,
419 * we can be sure that n is not reduced to
420 * nothingness by this move, _unless_ it was
421 * the very first node, ie the root of the
422 * tree. In that case we remove the now-empty
423 * root and replace it with its single large
434 sub
->kids
[3] = sub
->kids
[1];
435 sub
->elems
[2] = sub
->elems
[0];
436 sub
->kids
[2] = sub
->kids
[0];
437 sub
->elems
[1] = n
->elems
[ki
];
438 sub
->kids
[1] = sib
->kids
[1];
439 if (sub
->kids
[1]) sub
->kids
[1]->parent
= sub
;
440 sub
->elems
[0] = sib
->elems
[0];
441 sub
->kids
[0] = sib
->kids
[0];
442 if (sub
->kids
[0]) sub
->kids
[0]->parent
= sub
;
447 * That's built the big node in sub. Now we
448 * need to remove the reference to sib in n.
450 for (j
= ki
; j
< 3 && n
->kids
[j
+1]; j
++) {
451 n
->kids
[j
] = n
->kids
[j
+1];
452 n
->elems
[j
] = j
<2 ? n
->elems
[j
+1] : NULL
;
455 if (j
< 3) n
->elems
[j
] = NULL
;
460 * The root is empty and needs to be
463 LOG((" shifting root!\n"));
473 return; /* nothing to do; `already removed' */
476 * Treat special case: this is the one remaining item in
477 * the tree. n is the tree root (no parent), has one
478 * element (no elems[1]), and has no kids (no kids[0]).
480 if (!n
->parent
&& !n
->elems
[1] && !n
->kids
[0]) {
481 LOG((" removed last element in tree\n"));
488 * Now we have the element we want, as n->elems[ei], and we
489 * have also arranged for that element not to be the only
490 * one in its node. So...
493 if (!n
->kids
[0] && n
->elems
[1]) {
495 * Case 1. n is a leaf node with more than one element,
496 * so it's _really easy_. Just delete the thing and
501 for (i
= ei
; i
< 2 && n
->elems
[i
+1]; i
++)
502 n
->elems
[i
] = n
->elems
[i
+1];
504 return; /* finished! */
505 } else if (n
->kids
[ei
]->elems
[1]) {
507 * Case 2a. n is an internal node, and the root of the
508 * subtree to the left of e has more than one element.
509 * So find the predecessor p to e (ie the largest node
510 * in that subtree), place it where e currently is, and
511 * then start the deletion process over again on the
512 * subtree with p as target.
514 node234
*m
= n
->kids
[ei
];
518 m
= (m
->kids
[3] ? m
->kids
[3] :
519 m
->kids
[2] ? m
->kids
[2] :
520 m
->kids
[1] ? m
->kids
[1] : m
->kids
[0]);
522 target
= (m
->elems
[2] ? m
->elems
[2] :
523 m
->elems
[1] ? m
->elems
[1] : m
->elems
[0]);
524 n
->elems
[ei
] = target
;
527 } else if (n
->kids
[ei
+1]->elems
[1]) {
529 * Case 2b, symmetric to 2a but s/left/right/ and
530 * s/predecessor/successor/. (And s/largest/smallest/).
532 node234
*m
= n
->kids
[ei
+1];
538 target
= m
->elems
[0];
539 n
->elems
[ei
] = target
;
544 * Case 2c. n is an internal node, and the subtrees to
545 * the left and right of e both have only one element.
546 * So combine the two subnodes into a single big node
547 * with their own elements on the left and right and e
548 * in the middle, then restart the deletion process on
549 * that subtree, with e still as target.
551 node234
*a
= n
->kids
[ei
], *b
= n
->kids
[ei
+1];
555 a
->elems
[1] = n
->elems
[ei
];
556 a
->kids
[2] = b
->kids
[0];
557 if (a
->kids
[2]) a
->kids
[2]->parent
= a
;
558 a
->elems
[2] = b
->elems
[0];
559 a
->kids
[3] = b
->kids
[1];
560 if (a
->kids
[3]) a
->kids
[3]->parent
= a
;
563 * That's built the big node in a, and destroyed b. Now
564 * remove the reference to b (and e) in n.
566 for (j
= ei
; j
< 2 && n
->elems
[j
+1]; j
++) {
567 n
->elems
[j
] = n
->elems
[j
+1];
568 n
->kids
[j
+1] = n
->kids
[j
+2];
573 * Now go round the deletion process again, with n
574 * pointing at the new big node and e still the same.
582 * Iterate over the elements of a tree234, in order.
584 void *first234(tree234
*t
, enum234
*e
) {
585 node234
*n
= t
->root
;
595 void *next234(enum234
*e
) {
596 node234
*n
= e
->node
;
599 if (n
->kids
[pos
+1]) {
608 if (pos
< 2 && n
->elems
[pos
+1]) {
610 return n
->elems
[e
->posn
];
614 node234
*nn
= n
->parent
;
616 return NULL
; /* end of tree */
617 pos
= (nn
->kids
[0] == n ?
0 :
618 nn
->kids
[1] == n ?
1 :
619 nn
->kids
[2] == n ?
2 : 3);
621 } while (pos
== 3 || n
->kids
[pos
+1] == NULL
);
625 return n
->elems
[pos
];
630 int pnode(node234
*n
, int level
) {
631 printf("%*s%p\n", level
*4, "", n
);
632 if (n
->kids
[0]) pnode(n
->kids
[0], level
+1);
633 if (n
->elems
[0]) printf("%*s\"%s\"\n", level
*4+4, "", n
->elems
[0]);
634 if (n
->kids
[1]) pnode(n
->kids
[1], level
+1);
635 if (n
->elems
[1]) printf("%*s\"%s\"\n", level
*4+4, "", n
->elems
[1]);
636 if (n
->kids
[2]) pnode(n
->kids
[2], level
+1);
637 if (n
->elems
[2]) printf("%*s\"%s\"\n", level
*4+4, "", n
->elems
[2]);
638 if (n
->kids
[3]) pnode(n
->kids
[3], level
+1);
640 int ptree(tree234
*t
) {
644 printf("empty tree\n");
647 int cmp(void *av
, void *bv
) {
648 char *a
= (char *)av
;
649 char *b
= (char *)bv
;
654 tree234
*t
= newtree234(cmp
);
656 add234(t
, "Richard");
663 add234(t
, "Rabbits");
668 add234(t
, "Invisible");
669 add234(t
, "Vegetables");
672 del234(t
, find234(t
, "Richard", NULL
));
674 del234(t
, find234(t
, "Of", NULL
));
676 del234(t
, find234(t
, "York", NULL
));
678 del234(t
, find234(t
, "Gave", NULL
));
680 del234(t
, find234(t
, "Battle", NULL
));
682 del234(t
, find234(t
, "In", NULL
));
684 del234(t
, find234(t
, "Vain", NULL
));
686 del234(t
, find234(t
, "Rabbits", NULL
));
688 del234(t
, find234(t
, "On", NULL
));
690 del234(t
, find234(t
, "Your", NULL
));
692 del234(t
, find234(t
, "Garden", NULL
));
694 del234(t
, find234(t
, "Bring", NULL
));
696 del234(t
, find234(t
, "Invisible", NULL
));
698 del234(t
, find234(t
, "Vegetables", NULL
));