2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
4 * This file is copyright 1999-2001 Simon Tatham.
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #define smalloc malloc
37 #define snew(typ) ( (typ *) smalloc (sizeof (typ)) )
40 #define LOG(x) (printf x)
45 typedef struct node234_Tag node234
;
60 * Create a 2-3-4 tree.
62 tree234
*newtree234(cmpfn234 cmp
) {
63 tree234
*ret
= snew(tree234
);
64 LOG(("created tree %p\n", ret
));
71 * Free a 2-3-4 tree (not including freeing the elements).
73 static void freenode234(node234
*n
) {
76 freenode234(n
->kids
[0]);
77 freenode234(n
->kids
[1]);
78 freenode234(n
->kids
[2]);
79 freenode234(n
->kids
[3]);
82 void freetree234(tree234
*t
) {
88 * Internal function to count a node.
90 static int countnode234(node234
*n
) {
95 for (i
= 0; i
< 4; i
++)
96 count
+= n
->counts
[i
];
97 for (i
= 0; i
< 3; i
++)
104 * Count the elements in a tree.
106 int count234(tree234
*t
) {
108 return countnode234(t
->root
);
114 * Propagate a node overflow up a tree until it stops. Returns 0 or
115 * 1, depending on whether the root had to be split or not.
117 static int add234_insert(node234
*left
, void *e
, node234
*right
,
118 node234
**root
, node234
*n
, int ki
) {
121 * We need to insert the new left/element/right set in n at
124 lcount
= countnode234(left
);
125 rcount
= countnode234(right
);
127 LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
129 n
->kids
[0], n
->counts
[0], n
->elems
[0],
130 n
->kids
[1], n
->counts
[1], n
->elems
[1],
131 n
->kids
[2], n
->counts
[2], n
->elems
[2],
132 n
->kids
[3], n
->counts
[3]));
133 LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n",
134 left
, lcount
, e
, right
, rcount
, ki
));
135 if (n
->elems
[1] == NULL
) {
137 * Insert in a 2-node; simple.
140 LOG((" inserting on left of 2-node\n"));
141 n
->kids
[2] = n
->kids
[1]; n
->counts
[2] = n
->counts
[1];
142 n
->elems
[1] = n
->elems
[0];
143 n
->kids
[1] = right
; n
->counts
[1] = rcount
;
145 n
->kids
[0] = left
; n
->counts
[0] = lcount
;
146 } else { /* ki == 1 */
147 LOG((" inserting on right of 2-node\n"));
148 n
->kids
[2] = right
; n
->counts
[2] = rcount
;
150 n
->kids
[1] = left
; n
->counts
[1] = lcount
;
152 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
153 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
154 if (n
->kids
[2]) n
->kids
[2]->parent
= n
;
157 } else if (n
->elems
[2] == NULL
) {
159 * Insert in a 3-node; simple.
162 LOG((" inserting on left of 3-node\n"));
163 n
->kids
[3] = n
->kids
[2]; n
->counts
[3] = n
->counts
[2];
164 n
->elems
[2] = n
->elems
[1];
165 n
->kids
[2] = n
->kids
[1]; n
->counts
[2] = n
->counts
[1];
166 n
->elems
[1] = n
->elems
[0];
167 n
->kids
[1] = right
; n
->counts
[1] = rcount
;
169 n
->kids
[0] = left
; n
->counts
[0] = lcount
;
170 } else if (ki
== 1) {
171 LOG((" inserting in middle of 3-node\n"));
172 n
->kids
[3] = n
->kids
[2]; n
->counts
[3] = n
->counts
[2];
173 n
->elems
[2] = n
->elems
[1];
174 n
->kids
[2] = right
; n
->counts
[2] = rcount
;
176 n
->kids
[1] = left
; n
->counts
[1] = lcount
;
177 } else { /* ki == 2 */
178 LOG((" inserting on right of 3-node\n"));
179 n
->kids
[3] = right
; n
->counts
[3] = rcount
;
181 n
->kids
[2] = left
; n
->counts
[2] = lcount
;
183 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
184 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
185 if (n
->kids
[2]) n
->kids
[2]->parent
= n
;
186 if (n
->kids
[3]) n
->kids
[3]->parent
= n
;
190 node234
*m
= snew(node234
);
191 m
->parent
= n
->parent
;
192 LOG((" splitting a 4-node; created new node %p\n", m
));
194 * Insert in a 4-node; split into a 2-node and a
195 * 3-node, and move focus up a level.
197 * I don't think it matters which way round we put the
198 * 2 and the 3. For simplicity, we'll put the 3 first
202 m
->kids
[0] = left
; m
->counts
[0] = lcount
;
204 m
->kids
[1] = right
; m
->counts
[1] = rcount
;
205 m
->elems
[1] = n
->elems
[0];
206 m
->kids
[2] = n
->kids
[1]; m
->counts
[2] = n
->counts
[1];
208 n
->kids
[0] = n
->kids
[2]; n
->counts
[0] = n
->counts
[2];
209 n
->elems
[0] = n
->elems
[2];
210 n
->kids
[1] = n
->kids
[3]; n
->counts
[1] = n
->counts
[3];
211 } else if (ki
== 1) {
212 m
->kids
[0] = n
->kids
[0]; m
->counts
[0] = n
->counts
[0];
213 m
->elems
[0] = n
->elems
[0];
214 m
->kids
[1] = left
; m
->counts
[1] = lcount
;
216 m
->kids
[2] = right
; m
->counts
[2] = rcount
;
218 n
->kids
[0] = n
->kids
[2]; n
->counts
[0] = n
->counts
[2];
219 n
->elems
[0] = n
->elems
[2];
220 n
->kids
[1] = n
->kids
[3]; n
->counts
[1] = n
->counts
[3];
221 } else if (ki
== 2) {
222 m
->kids
[0] = n
->kids
[0]; m
->counts
[0] = n
->counts
[0];
223 m
->elems
[0] = n
->elems
[0];
224 m
->kids
[1] = n
->kids
[1]; m
->counts
[1] = n
->counts
[1];
225 m
->elems
[1] = n
->elems
[1];
226 m
->kids
[2] = left
; m
->counts
[2] = lcount
;
228 n
->kids
[0] = right
; n
->counts
[0] = rcount
;
229 n
->elems
[0] = n
->elems
[2];
230 n
->kids
[1] = n
->kids
[3]; n
->counts
[1] = n
->counts
[3];
231 } else { /* ki == 3 */
232 m
->kids
[0] = n
->kids
[0]; m
->counts
[0] = n
->counts
[0];
233 m
->elems
[0] = n
->elems
[0];
234 m
->kids
[1] = n
->kids
[1]; m
->counts
[1] = n
->counts
[1];
235 m
->elems
[1] = n
->elems
[1];
236 m
->kids
[2] = n
->kids
[2]; m
->counts
[2] = n
->counts
[2];
237 n
->kids
[0] = left
; n
->counts
[0] = lcount
;
239 n
->kids
[1] = right
; n
->counts
[1] = rcount
;
242 m
->kids
[3] = n
->kids
[3] = n
->kids
[2] = NULL
;
243 m
->counts
[3] = n
->counts
[3] = n
->counts
[2] = 0;
244 m
->elems
[2] = n
->elems
[2] = n
->elems
[1] = NULL
;
245 if (m
->kids
[0]) m
->kids
[0]->parent
= m
;
246 if (m
->kids
[1]) m
->kids
[1]->parent
= m
;
247 if (m
->kids
[2]) m
->kids
[2]->parent
= m
;
248 if (n
->kids
[0]) n
->kids
[0]->parent
= n
;
249 if (n
->kids
[1]) n
->kids
[1]->parent
= n
;
250 LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m
,
251 m
->kids
[0], m
->counts
[0], m
->elems
[0],
252 m
->kids
[1], m
->counts
[1], m
->elems
[1],
253 m
->kids
[2], m
->counts
[2]));
254 LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n
,
255 n
->kids
[0], n
->counts
[0], n
->elems
[0],
256 n
->kids
[1], n
->counts
[1]));
257 left
= m
; lcount
= countnode234(left
);
258 right
= n
; rcount
= countnode234(right
);
261 ki
= (n
->parent
->kids
[0] == n ?
0 :
262 n
->parent
->kids
[1] == n ?
1 :
263 n
->parent
->kids
[2] == n ?
2 : 3);
268 * If we've come out of here by `break', n will still be
269 * non-NULL and all we need to do is go back up the tree
270 * updating counts. If we've come here because n is NULL, we
271 * need to create a new root for the tree because the old one
272 * has just split into two. */
275 int count
= countnode234(n
);
277 childnum
= (n
->parent
->kids
[0] == n ?
0 :
278 n
->parent
->kids
[1] == n ?
1 :
279 n
->parent
->kids
[2] == n ?
2 : 3);
280 n
->parent
->counts
[childnum
] = count
;
283 return 0; /* root unchanged */
285 LOG((" root is overloaded, split into two\n"));
286 (*root
) = snew(node234
);
287 (*root
)->kids
[0] = left
; (*root
)->counts
[0] = lcount
;
288 (*root
)->elems
[0] = e
;
289 (*root
)->kids
[1] = right
; (*root
)->counts
[1] = rcount
;
290 (*root
)->elems
[1] = NULL
;
291 (*root
)->kids
[2] = NULL
; (*root
)->counts
[2] = 0;
292 (*root
)->elems
[2] = NULL
;
293 (*root
)->kids
[3] = NULL
; (*root
)->counts
[3] = 0;
294 (*root
)->parent
= NULL
;
295 if ((*root
)->kids
[0]) (*root
)->kids
[0]->parent
= (*root
);
296 if ((*root
)->kids
[1]) (*root
)->kids
[1]->parent
= (*root
);
297 LOG((" new root is %p/%d \"%s\" %p/%d\n",
298 (*root
)->kids
[0], (*root
)->counts
[0],
300 (*root
)->kids
[1], (*root
)->counts
[1]));
301 return 1; /* root moved */
306 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
307 * an existing element compares equal, returns that.
309 static void *add234_internal(tree234
*t
, void *e
, int index
) {
315 LOG(("adding element \"%s\" to tree %p\n", e
, t
));
316 if (t
->root
== NULL
) {
317 t
->root
= snew(node234
);
318 t
->root
->elems
[1] = t
->root
->elems
[2] = NULL
;
319 t
->root
->kids
[0] = t
->root
->kids
[1] = NULL
;
320 t
->root
->kids
[2] = t
->root
->kids
[3] = NULL
;
321 t
->root
->counts
[0] = t
->root
->counts
[1] = 0;
322 t
->root
->counts
[2] = t
->root
->counts
[3] = 0;
323 t
->root
->parent
= NULL
;
324 t
->root
->elems
[0] = e
;
325 LOG((" created root %p\n", t
->root
));
331 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
333 n
->kids
[0], n
->counts
[0], n
->elems
[0],
334 n
->kids
[1], n
->counts
[1], n
->elems
[1],
335 n
->kids
[2], n
->counts
[2], n
->elems
[2],
336 n
->kids
[3], n
->counts
[3]));
340 * Leaf node. We want to insert at kid position
341 * equal to the index:
348 * Internal node. We always descend through it (add
349 * always starts at the bottom, never in the
352 if (index
<= n
->counts
[0]) {
354 } else if (index
-= n
->counts
[0] + 1, index
<= n
->counts
[1]) {
356 } else if (index
-= n
->counts
[1] + 1, index
<= n
->counts
[2]) {
358 } else if (index
-= n
->counts
[2] + 1, index
<= n
->counts
[3]) {
361 return NULL
; /* error: index out of range */
364 if ((c
= t
->cmp(e
, n
->elems
[0])) < 0)
367 return n
->elems
[0]; /* already exists */
368 else if (n
->elems
[1] == NULL
|| (c
= t
->cmp(e
, n
->elems
[1])) < 0)
371 return n
->elems
[1]; /* already exists */
372 else if (n
->elems
[2] == NULL
|| (c
= t
->cmp(e
, n
->elems
[2])) < 0)
375 return n
->elems
[2]; /* already exists */
379 LOG((" moving to child %d (%p)\n", ki
, n
->kids
[ki
]));
385 add234_insert(NULL
, e
, NULL
, &t
->root
, n
, ki
);
390 void *add234(tree234
*t
, void *e
) {
391 if (!t
->cmp
) /* tree is unsorted */
394 return add234_internal(t
, e
, -1);
396 void *addpos234(tree234
*t
, void *e
, int index
) {
397 if (index
< 0 || /* index out of range */
398 t
->cmp
) /* tree is sorted */
399 return NULL
; /* return failure */
401 return add234_internal(t
, e
, index
); /* this checks the upper bound */
405 * Look up the element at a given numeric index in a 2-3-4 tree.
406 * Returns NULL if the index is out of range.
408 void *index234(tree234
*t
, int index
) {
412 return NULL
; /* tree is empty */
414 if (index
< 0 || index
>= countnode234(t
->root
))
415 return NULL
; /* out of range */
420 if (index
< n
->counts
[0])
422 else if (index
-= n
->counts
[0] + 1, index
< 0)
424 else if (index
< n
->counts
[1])
426 else if (index
-= n
->counts
[1] + 1, index
< 0)
428 else if (index
< n
->counts
[2])
430 else if (index
-= n
->counts
[2] + 1, index
< 0)
436 /* We shouldn't ever get here. I wonder how we did. */
441 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
442 * found. e is always passed as the first argument to cmp, so cmp
443 * can be an asymmetric function if desired. cmp can also be passed
444 * as NULL, in which case the compare function from the tree proper
447 void *findrelpos234(tree234
*t
, void *e
, cmpfn234 cmp
,
448 int relation
, int *index
) {
452 int idx
, ecount
, kcount
, cmpret
;
462 * Attempt to find the element itself.
467 * Prepare a fake `cmp' result if e is NULL.
471 assert(relation
== REL234_LT
|| relation
== REL234_GT
);
472 if (relation
== REL234_LT
)
473 cmpret
= +1; /* e is a max: always greater */
474 else if (relation
== REL234_GT
)
475 cmpret
= -1; /* e is a min: always smaller */
478 for (kcount
= 0; kcount
< 4; kcount
++) {
479 if (kcount
>= 3 || n
->elems
[kcount
] == NULL
||
480 (c
= cmpret ? cmpret
: cmp(e
, n
->elems
[kcount
])) < 0) {
483 if (n
->kids
[kcount
]) idx
+= n
->counts
[kcount
];
500 * We have found the element we're looking for. It's
501 * n->elems[ecount], at tree index idx. If our search
502 * relation is EQ, LE or GE we can now go home.
504 if (relation
!= REL234_LT
&& relation
!= REL234_GT
) {
505 if (index
) *index
= idx
;
506 return n
->elems
[ecount
];
510 * Otherwise, we'll do an indexed lookup for the previous
511 * or next element. (It would be perfectly possible to
512 * implement these search types in a non-counted tree by
513 * going back up from where we are, but far more fiddly.)
515 if (relation
== REL234_LT
)
521 * We've found our way to the bottom of the tree and we
522 * know where we would insert this node if we wanted to:
523 * we'd put it in in place of the (empty) subtree
524 * n->kids[kcount], and it would have index idx
526 * But the actual element isn't there. So if our search
527 * relation is EQ, we're doomed.
529 if (relation
== REL234_EQ
)
533 * Otherwise, we must do an index lookup for index idx-1
534 * (if we're going left - LE or LT) or index idx (if we're
535 * going right - GE or GT).
537 if (relation
== REL234_LT
|| relation
== REL234_LE
) {
543 * We know the index of the element we want; just call index234
544 * to do the rest. This will return NULL if the index is out of
545 * bounds, which is exactly what we want.
547 ret
= index234(t
, idx
);
548 if (ret
&& index
) *index
= idx
;
551 void *find234(tree234
*t
, void *e
, cmpfn234 cmp
) {
552 return findrelpos234(t
, e
, cmp
, REL234_EQ
, NULL
);
554 void *findrel234(tree234
*t
, void *e
, cmpfn234 cmp
, int relation
) {
555 return findrelpos234(t
, e
, cmp
, relation
, NULL
);
557 void *findpos234(tree234
*t
, void *e
, cmpfn234 cmp
, int *index
) {
558 return findrelpos234(t
, e
, cmp
, REL234_EQ
, index
);
562 * Tree transformation used in delete and split: move a subtree
563 * right, from child ki of a node to the next child. Update k and
564 * index so that they still point to the same place in the
565 * transformed tree. Assumes the destination child is not full, and
566 * that the source child does have a subtree to spare. Can cope if
567 * the destination child is undersized.
571 * [more] a A b B c d D e [more] a A b c C d D e
575 * [more] a A b B c d [more] a A b c C d
577 static void trans234_subtree_right(node234
*n
, int ki
, int *k
, int *index
) {
579 int i
, srclen
, adjust
;
582 dest
= n
->kids
[ki
+1];
584 LOG((" trans234_subtree_right(%p, %d):\n", n
, ki
));
585 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
587 n
->kids
[0], n
->counts
[0], n
->elems
[0],
588 n
->kids
[1], n
->counts
[1], n
->elems
[1],
589 n
->kids
[2], n
->counts
[2], n
->elems
[2],
590 n
->kids
[3], n
->counts
[3]));
591 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
593 src
->kids
[0], src
->counts
[0], src
->elems
[0],
594 src
->kids
[1], src
->counts
[1], src
->elems
[1],
595 src
->kids
[2], src
->counts
[2], src
->elems
[2],
596 src
->kids
[3], src
->counts
[3]));
597 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
599 dest
->kids
[0], dest
->counts
[0], dest
->elems
[0],
600 dest
->kids
[1], dest
->counts
[1], dest
->elems
[1],
601 dest
->kids
[2], dest
->counts
[2], dest
->elems
[2],
602 dest
->kids
[3], dest
->counts
[3]));
604 * Move over the rest of the destination node to make space.
606 dest
->kids
[3] = dest
->kids
[2]; dest
->counts
[3] = dest
->counts
[2];
607 dest
->elems
[2] = dest
->elems
[1];
608 dest
->kids
[2] = dest
->kids
[1]; dest
->counts
[2] = dest
->counts
[1];
609 dest
->elems
[1] = dest
->elems
[0];
610 dest
->kids
[1] = dest
->kids
[0]; dest
->counts
[1] = dest
->counts
[0];
612 /* which element to move over */
613 i
= (src
->elems
[2] ?
2 : src
->elems
[1] ?
1 : 0);
615 dest
->elems
[0] = n
->elems
[ki
];
616 n
->elems
[ki
] = src
->elems
[i
];
617 src
->elems
[i
] = NULL
;
619 dest
->kids
[0] = src
->kids
[i
+1]; dest
->counts
[0] = src
->counts
[i
+1];
620 src
->kids
[i
+1] = NULL
; src
->counts
[i
+1] = 0;
622 if (dest
->kids
[0]) dest
->kids
[0]->parent
= dest
;
624 adjust
= dest
->counts
[0] + 1;
626 n
->counts
[ki
] -= adjust
;
627 n
->counts
[ki
+1] += adjust
;
629 srclen
= n
->counts
[ki
];
632 LOG((" before: k,index = %d,%d\n", (*k
), (*index
)));
633 if ((*k
) == ki
&& (*index
) > srclen
) {
634 (*index
) -= srclen
+ 1;
636 } else if ((*k
) == ki
+1) {
639 LOG((" after: k,index = %d,%d\n", (*k
), (*index
)));
642 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
644 n
->kids
[0], n
->counts
[0], n
->elems
[0],
645 n
->kids
[1], n
->counts
[1], n
->elems
[1],
646 n
->kids
[2], n
->counts
[2], n
->elems
[2],
647 n
->kids
[3], n
->counts
[3]));
648 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
650 src
->kids
[0], src
->counts
[0], src
->elems
[0],
651 src
->kids
[1], src
->counts
[1], src
->elems
[1],
652 src
->kids
[2], src
->counts
[2], src
->elems
[2],
653 src
->kids
[3], src
->counts
[3]));
654 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
656 dest
->kids
[0], dest
->counts
[0], dest
->elems
[0],
657 dest
->kids
[1], dest
->counts
[1], dest
->elems
[1],
658 dest
->kids
[2], dest
->counts
[2], dest
->elems
[2],
659 dest
->kids
[3], dest
->counts
[3]));
663 * Tree transformation used in delete and split: move a subtree
664 * left, from child ki of a node to the previous child. Update k
665 * and index so that they still point to the same place in the
666 * transformed tree. Assumes the destination child is not full, and
667 * that the source child does have a subtree to spare. Can cope if
668 * the destination child is undersized.
672 * a A b c C d D e [more] a A b B c d D e [more]
676 * a b B c C d [more] a A b c C d [more]
678 static void trans234_subtree_left(node234
*n
, int ki
, int *k
, int *index
) {
683 dest
= n
->kids
[ki
-1];
685 LOG((" trans234_subtree_left(%p, %d):\n", n
, ki
));
686 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
688 n
->kids
[0], n
->counts
[0], n
->elems
[0],
689 n
->kids
[1], n
->counts
[1], n
->elems
[1],
690 n
->kids
[2], n
->counts
[2], n
->elems
[2],
691 n
->kids
[3], n
->counts
[3]));
692 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
694 dest
->kids
[0], dest
->counts
[0], dest
->elems
[0],
695 dest
->kids
[1], dest
->counts
[1], dest
->elems
[1],
696 dest
->kids
[2], dest
->counts
[2], dest
->elems
[2],
697 dest
->kids
[3], dest
->counts
[3]));
698 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
700 src
->kids
[0], src
->counts
[0], src
->elems
[0],
701 src
->kids
[1], src
->counts
[1], src
->elems
[1],
702 src
->kids
[2], src
->counts
[2], src
->elems
[2],
703 src
->kids
[3], src
->counts
[3]));
705 /* where in dest to put it */
706 i
= (dest
->elems
[1] ?
2 : dest
->elems
[0] ?
1 : 0);
707 dest
->elems
[i
] = n
->elems
[ki
-1];
708 n
->elems
[ki
-1] = src
->elems
[0];
710 dest
->kids
[i
+1] = src
->kids
[0]; dest
->counts
[i
+1] = src
->counts
[0];
712 if (dest
->kids
[i
+1]) dest
->kids
[i
+1]->parent
= dest
;
715 * Move over the rest of the source node.
717 src
->kids
[0] = src
->kids
[1]; src
->counts
[0] = src
->counts
[1];
718 src
->elems
[0] = src
->elems
[1];
719 src
->kids
[1] = src
->kids
[2]; src
->counts
[1] = src
->counts
[2];
720 src
->elems
[1] = src
->elems
[2];
721 src
->kids
[2] = src
->kids
[3]; src
->counts
[2] = src
->counts
[3];
722 src
->elems
[2] = NULL
;
723 src
->kids
[3] = NULL
; src
->counts
[3] = 0;
725 adjust
= dest
->counts
[i
+1] + 1;
727 n
->counts
[ki
] -= adjust
;
728 n
->counts
[ki
-1] += adjust
;
731 LOG((" before: k,index = %d,%d\n", (*k
), (*index
)));
735 (*index
) += n
->counts
[ki
-1] + 1;
739 LOG((" after: k,index = %d,%d\n", (*k
), (*index
)));
742 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
744 n
->kids
[0], n
->counts
[0], n
->elems
[0],
745 n
->kids
[1], n
->counts
[1], n
->elems
[1],
746 n
->kids
[2], n
->counts
[2], n
->elems
[2],
747 n
->kids
[3], n
->counts
[3]));
748 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
750 dest
->kids
[0], dest
->counts
[0], dest
->elems
[0],
751 dest
->kids
[1], dest
->counts
[1], dest
->elems
[1],
752 dest
->kids
[2], dest
->counts
[2], dest
->elems
[2],
753 dest
->kids
[3], dest
->counts
[3]));
754 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
756 src
->kids
[0], src
->counts
[0], src
->elems
[0],
757 src
->kids
[1], src
->counts
[1], src
->elems
[1],
758 src
->kids
[2], src
->counts
[2], src
->elems
[2],
759 src
->kids
[3], src
->counts
[3]));
763 * Tree transformation used in delete and split: merge child nodes
764 * ki and ki+1 of a node. Update k and index so that they still
765 * point to the same place in the transformed tree. Assumes both
766 * children _are_ sufficiently small.
770 * a A b c C d a A b B c C d
772 * This routine can also cope with either child being undersized:
780 * a b B c C d a A b B c C d
782 static void trans234_subtree_merge(node234
*n
, int ki
, int *k
, int *index
) {
783 node234
*left
, *right
;
784 int i
, leftlen
, rightlen
, lsize
, rsize
;
786 left
= n
->kids
[ki
]; leftlen
= n
->counts
[ki
];
787 right
= n
->kids
[ki
+1]; rightlen
= n
->counts
[ki
+1];
789 LOG((" trans234_subtree_merge(%p, %d):\n", n
, ki
));
790 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
792 n
->kids
[0], n
->counts
[0], n
->elems
[0],
793 n
->kids
[1], n
->counts
[1], n
->elems
[1],
794 n
->kids
[2], n
->counts
[2], n
->elems
[2],
795 n
->kids
[3], n
->counts
[3]));
796 LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
798 left
->kids
[0], left
->counts
[0], left
->elems
[0],
799 left
->kids
[1], left
->counts
[1], left
->elems
[1],
800 left
->kids
[2], left
->counts
[2], left
->elems
[2],
801 left
->kids
[3], left
->counts
[3]));
802 LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
804 right
->kids
[0], right
->counts
[0], right
->elems
[0],
805 right
->kids
[1], right
->counts
[1], right
->elems
[1],
806 right
->kids
[2], right
->counts
[2], right
->elems
[2],
807 right
->kids
[3], right
->counts
[3]));
809 assert(!left
->elems
[2] && !right
->elems
[2]); /* neither is large! */
810 lsize
= (left
->elems
[1] ?
2 : left
->elems
[0] ?
1 : 0);
811 rsize
= (right
->elems
[1] ?
2 : right
->elems
[0] ?
1 : 0);
813 left
->elems
[lsize
] = n
->elems
[ki
];
815 for (i
= 0; i
< rsize
+1; i
++) {
816 left
->kids
[lsize
+1+i
] = right
->kids
[i
];
817 left
->counts
[lsize
+1+i
] = right
->counts
[i
];
818 if (left
->kids
[lsize
+1+i
])
819 left
->kids
[lsize
+1+i
]->parent
= left
;
821 left
->elems
[lsize
+1+i
] = right
->elems
[i
];
824 n
->counts
[ki
] += rightlen
+ 1;
829 * Move the rest of n up by one.
831 for (i
= ki
+1; i
< 3; i
++) {
832 n
->kids
[i
] = n
->kids
[i
+1];
833 n
->counts
[i
] = n
->counts
[i
+1];
835 for (i
= ki
; i
< 2; i
++) {
836 n
->elems
[i
] = n
->elems
[i
+1];
843 LOG((" before: k,index = %d,%d\n", (*k
), (*index
)));
846 (*index
) += leftlen
+ 1;
847 } else if ((*k
) > ki
+1) {
850 LOG((" after: k,index = %d,%d\n", (*k
), (*index
)));
853 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
855 n
->kids
[0], n
->counts
[0], n
->elems
[0],
856 n
->kids
[1], n
->counts
[1], n
->elems
[1],
857 n
->kids
[2], n
->counts
[2], n
->elems
[2],
858 n
->kids
[3], n
->counts
[3]));
859 LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
861 left
->kids
[0], left
->counts
[0], left
->elems
[0],
862 left
->kids
[1], left
->counts
[1], left
->elems
[1],
863 left
->kids
[2], left
->counts
[2], left
->elems
[2],
864 left
->kids
[3], left
->counts
[3]));
869 * Delete an element e in a 2-3-4 tree. Does not free the element,
870 * merely removes all links to it from the tree nodes.
872 static void *delpos234_internal(tree234
*t
, int index
) {
879 n
= t
->root
; /* by assumption this is non-NULL */
880 LOG(("deleting item %d from tree %p\n", index
, t
));
884 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
886 n
->kids
[0], n
->counts
[0], n
->elems
[0],
887 n
->kids
[1], n
->counts
[1], n
->elems
[1],
888 n
->kids
[2], n
->counts
[2], n
->elems
[2],
889 n
->kids
[3], n
->counts
[3],
891 if (index
<= n
->counts
[0]) {
893 } else if (index
-= n
->counts
[0]+1, index
<= n
->counts
[1]) {
895 } else if (index
-= n
->counts
[1]+1, index
<= n
->counts
[2]) {
897 } else if (index
-= n
->counts
[2]+1, index
<= n
->counts
[3]) {
900 assert(0); /* can't happen */
904 break; /* n is a leaf node; we're here! */
907 * Check to see if we've found our target element. If so,
908 * we must choose a new target (we'll use the old target's
909 * successor, which will be in a leaf), move it into the
910 * place of the old one, continue down to the leaf and
911 * delete the old copy of the new target.
913 if (index
== n
->counts
[ki
]) {
915 LOG((" found element in internal node, index %d\n", ki
));
916 assert(n
->elems
[ki
]); /* must be a kid _before_ an element */
918 for (m
= n
->kids
[ki
]; m
->kids
[0]; m
= m
->kids
[0])
920 LOG((" replacing with element \"%s\" from leaf node %p\n",
922 retval
= n
->elems
[ki
-1];
923 n
->elems
[ki
-1] = m
->elems
[0];
927 * Recurse down to subtree ki. If it has only one element,
928 * we have to do some transformation to start with.
930 LOG((" moving to subtree %d\n", ki
));
932 if (!sub
->elems
[1]) {
933 LOG((" subtree has only one element!\n"));
934 if (ki
> 0 && n
->kids
[ki
-1]->elems
[1]) {
936 * Child ki has only one element, but child
937 * ki-1 has two or more. So we need to move a
938 * subtree from ki-1 to ki.
940 trans234_subtree_right(n
, ki
-1, &ki
, &index
);
941 } else if (ki
< 3 && n
->kids
[ki
+1] &&
942 n
->kids
[ki
+1]->elems
[1]) {
944 * Child ki has only one element, but ki+1 has
945 * two or more. Move a subtree from ki+1 to ki.
947 trans234_subtree_left(n
, ki
+1, &ki
, &index
);
950 * ki is small with only small neighbours. Pick a
951 * neighbour and merge with it.
953 trans234_subtree_merge(n
, ki
>0 ? ki
-1 : ki
, &ki
, &index
);
958 * The root is empty and needs to be
961 LOG((" shifting root!\n"));
976 * Now n is a leaf node, and ki marks the element number we
977 * want to delete. We've already arranged for the leaf to be
978 * bigger than minimum size, so let's just go to it.
982 retval
= n
->elems
[ki
];
984 for (i
= ki
; i
< 2 && n
->elems
[i
+1]; i
++)
985 n
->elems
[i
] = n
->elems
[i
+1];
989 * It's just possible that we have reduced the leaf to zero
990 * size. This can only happen if it was the root - so destroy
991 * it and make the tree empty.
994 LOG((" removed last element in tree, destroying empty root\n"));
995 assert(n
== t
->root
);
1000 return retval
; /* finished! */
1002 void *delpos234(tree234
*t
, int index
) {
1003 if (index
< 0 || index
>= countnode234(t
->root
))
1005 return delpos234_internal(t
, index
);
1007 void *del234(tree234
*t
, void *e
) {
1009 if (!findrelpos234(t
, e
, NULL
, REL234_EQ
, &index
))
1010 return NULL
; /* it wasn't in there anyway */
1011 return delpos234_internal(t
, index
); /* it's there; delete it. */
1015 * Join two subtrees together with a separator element between
1016 * them, given their relative height.
1018 * (Height<0 means the left tree is shorter, >0 means the right
1019 * tree is shorter, =0 means (duh) they're equal.)
1021 * It is assumed that any checks needed on the ordering criterion
1022 * have _already_ been done.
1024 * The value returned in `height' is 0 or 1 depending on whether the
1025 * resulting tree is the same height as the original larger one, or
1028 static node234
*join234_internal(node234
*left
, void *sep
,
1029 node234
*right
, int *height
) {
1030 node234
*root
, *node
;
1031 int relht
= *height
;
1034 LOG((" join: joining %p \"%s\" %p, relative height is %d\n",
1035 left
, sep
, right
, relht
));
1038 * The trees are the same height. Create a new one-element
1039 * root containing the separator and pointers to the two
1043 newroot
= snew(node234
);
1044 newroot
->kids
[0] = left
; newroot
->counts
[0] = countnode234(left
);
1045 newroot
->elems
[0] = sep
;
1046 newroot
->kids
[1] = right
; newroot
->counts
[1] = countnode234(right
);
1047 newroot
->elems
[1] = NULL
;
1048 newroot
->kids
[2] = NULL
; newroot
->counts
[2] = 0;
1049 newroot
->elems
[2] = NULL
;
1050 newroot
->kids
[3] = NULL
; newroot
->counts
[3] = 0;
1051 newroot
->parent
= NULL
;
1052 if (left
) left
->parent
= newroot
;
1053 if (right
) right
->parent
= newroot
;
1055 LOG((" join: same height, brand new root\n"));
1060 * This now works like the addition algorithm on the larger
1061 * tree. We're replacing a single kid pointer with two kid
1062 * pointers separated by an element; if that causes the node to
1063 * overload, we split it in two, move a separator element up to
1064 * the next node, and repeat.
1068 * Left tree is shorter. Search down the right tree to find
1069 * the pointer we're inserting at.
1071 node
= root
= right
;
1072 while (++relht
< 0) {
1073 node
= node
->kids
[0];
1076 right
= node
->kids
[ki
];
1079 * Right tree is shorter; search down the left to find the
1080 * pointer we're inserting at.
1083 while (--relht
> 0) {
1085 node
= node
->kids
[3];
1086 else if (node
->elems
[1])
1087 node
= node
->kids
[2];
1089 node
= node
->kids
[1];
1093 else if (node
->elems
[1])
1097 left
= node
->kids
[ki
];
1101 * Now proceed as for addition.
1103 *height
= add234_insert(left
, sep
, right
, &root
, node
, ki
);
1107 static int height234(tree234
*t
) {
1109 node234
*n
= t
->root
;
1116 tree234
*join234(tree234
*t1
, tree234
*t2
) {
1117 int size2
= countnode234(t2
->root
);
1123 element
= index234(t2
, 0);
1124 element
= findrelpos234(t1
, element
, NULL
, REL234_GE
, NULL
);
1129 element
= delpos234(t2
, 0);
1130 relht
= height234(t1
) - height234(t2
);
1131 t1
->root
= join234_internal(t1
->root
, element
, t2
->root
, &relht
);
1136 tree234
*join234r(tree234
*t1
, tree234
*t2
) {
1137 int size1
= countnode234(t1
->root
);
1143 element
= index234(t1
, size1
-1);
1144 element
= findrelpos234(t2
, element
, NULL
, REL234_LE
, NULL
);
1149 element
= delpos234(t1
, size1
-1);
1150 relht
= height234(t1
) - height234(t2
);
1151 t2
->root
= join234_internal(t1
->root
, element
, t2
->root
, &relht
);
1158 * Split out the first <index> elements in a tree and return a
1159 * pointer to the root node. Leave the root node of the remainder
1162 static node234
*split234_internal(tree234
*t
, int index
) {
1163 node234
*halves
[2], *n
, *sib
, *sub
;
1164 node234
*lparent
, *rparent
;
1165 int ki
, pki
, i
, half
, lcount
, rcount
;
1168 LOG(("splitting tree %p at point %d\n", t
, index
));
1171 * Easy special cases. After this we have also dealt completely
1172 * with the empty-tree case and we can assume the root exists.
1174 if (index
== 0) /* return nothing */
1176 if (index
== countnode234(t
->root
)) { /* return the whole tree */
1177 node234
*ret
= t
->root
;
1183 * Search down the tree to find the split point.
1185 lparent
= rparent
= NULL
;
1187 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
1189 n
->kids
[0], n
->counts
[0], n
->elems
[0],
1190 n
->kids
[1], n
->counts
[1], n
->elems
[1],
1191 n
->kids
[2], n
->counts
[2], n
->elems
[2],
1192 n
->kids
[3], n
->counts
[3],
1195 rcount
= countnode234(n
) - lcount
;
1196 if (index
<= n
->counts
[0]) {
1198 } else if (index
-= n
->counts
[0]+1, index
<= n
->counts
[1]) {
1200 } else if (index
-= n
->counts
[1]+1, index
<= n
->counts
[2]) {
1203 index
-= n
->counts
[2]+1;
1207 LOG((" splitting at subtree %d\n", ki
));
1210 LOG((" splitting at child index %d\n", ki
));
1213 * Split the node, put halves[0] on the right of the left
1214 * one and halves[1] on the left of the right one, put the
1215 * new node pointers in halves[0] and halves[1], and go up
1218 sib
= snew(node234
);
1219 for (i
= 0; i
< 3; i
++) {
1220 if (i
+ki
< 3 && n
->elems
[i
+ki
]) {
1221 sib
->elems
[i
] = n
->elems
[i
+ki
];
1222 sib
->kids
[i
+1] = n
->kids
[i
+ki
+1];
1223 if (sib
->kids
[i
+1]) sib
->kids
[i
+1]->parent
= sib
;
1224 sib
->counts
[i
+1] = n
->counts
[i
+ki
+1];
1225 n
->elems
[i
+ki
] = NULL
;
1226 n
->kids
[i
+ki
+1] = NULL
;
1227 n
->counts
[i
+ki
+1] = 0;
1229 sib
->elems
[i
] = NULL
;
1230 sib
->kids
[i
+1] = NULL
;
1231 sib
->counts
[i
+1] = 0;
1235 lparent
->kids
[pki
] = n
;
1236 lparent
->counts
[pki
] = lcount
;
1237 n
->parent
= lparent
;
1238 rparent
->kids
[0] = sib
;
1239 rparent
->counts
[0] = rcount
;
1240 sib
->parent
= rparent
;
1250 LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1252 n
->kids
[0], n
->counts
[0], n
->elems
[0],
1253 n
->kids
[1], n
->counts
[1], n
->elems
[1],
1254 n
->kids
[2], n
->counts
[2], n
->elems
[2],
1255 n
->kids
[3], n
->counts
[3]));
1256 LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1258 sib
->kids
[0], sib
->counts
[0], sib
->elems
[0],
1259 sib
->kids
[1], sib
->counts
[1], sib
->elems
[1],
1260 sib
->kids
[2], sib
->counts
[2], sib
->elems
[2],
1261 sib
->kids
[3], sib
->counts
[3]));
1267 * We've come off the bottom here, so we've successfully split
1268 * the tree into two equally high subtrees. The only problem is
1269 * that some of the nodes down the fault line will be smaller
1270 * than the minimum permitted size. (Since this is a 2-3-4
1271 * tree, that means they'll be zero-element one-child nodes.)
1273 LOG((" fell off bottom, lroot is %p, rroot is %p\n",
1274 halves
[0], halves
[1]));
1275 lparent
->counts
[pki
] = rparent
->counts
[0] = 0;
1276 lparent
->kids
[pki
] = rparent
->kids
[0] = NULL
;
1279 * So now we go back down the tree from each of the two roots,
1280 * fixing up undersize nodes.
1282 for (half
= 0; half
< 2; half
++) {
1284 * Remove the root if it's undersize (it will contain only
1285 * one child pointer, so just throw it away and replace it
1286 * with its child). This might happen several times.
1288 while (halves
[half
] && !halves
[half
]->elems
[0]) {
1289 LOG((" root %p is undersize, throwing away\n", halves
[half
]));
1290 halves
[half
] = halves
[half
]->kids
[0];
1291 sfree(halves
[half
]->parent
);
1292 halves
[half
]->parent
= NULL
;
1293 LOG((" new root is %p\n", halves
[half
]));
1298 void (*toward
)(node234
*n
, int ki
, int *k
, int *index
);
1302 * Now we have a potentially undersize node on the
1303 * right (if half==0) or left (if half==1). Sort it
1304 * out, by merging with a neighbour or by transferring
1305 * subtrees over. At this time we must also ensure that
1306 * nodes are bigger than minimum, in case we need an
1307 * element to merge two nodes below.
1309 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1311 n
->kids
[0], n
->counts
[0], n
->elems
[0],
1312 n
->kids
[1], n
->counts
[1], n
->elems
[1],
1313 n
->kids
[2], n
->counts
[2], n
->elems
[2],
1314 n
->kids
[3], n
->counts
[3]));
1316 ki
= 0; /* the kid we're interested in */
1317 ni
= 1; /* the neighbour */
1318 merge
= 0; /* for merge: leftmost of the two */
1319 toward
= trans234_subtree_left
;
1321 ki
= (n
->kids
[3] ?
3 : n
->kids
[2] ?
2 : 1);
1324 toward
= trans234_subtree_right
;
1328 if (sub
&& !sub
->elems
[1]) {
1330 * This node is undersized or minimum-size. If we
1331 * can merge it with its neighbour, we do so;
1332 * otherwise we must be able to transfer subtrees
1333 * over to it until it is greater than minimum
1336 int undersized
= (!sub
->elems
[0]);
1337 LOG((" child %d is %ssize\n", ki
,
1338 undersized ?
"under" : "minimum-"));
1339 LOG((" neighbour is %s\n",
1340 n
->kids
[ni
]->elems
[2] ?
"large" :
1341 n
->kids
[ni
]->elems
[1] ?
"medium" : "small"));
1342 if (!n
->kids
[ni
]->elems
[1] ||
1343 (undersized
&& !n
->kids
[ni
]->elems
[2])) {
1345 * Neighbour is small, or possibly neighbour is
1346 * medium and we are undersize.
1348 trans234_subtree_merge(n
, merge
, NULL
, NULL
);
1349 sub
= n
->kids
[merge
];
1352 * n is empty, and hence must have been the
1353 * root and needs to be removed.
1356 LOG((" shifting root!\n"));
1358 halves
[half
]->parent
= NULL
;
1362 /* Neighbour is big enough to move trees over. */
1363 toward(n
, ni
, NULL
, NULL
);
1365 toward(n
, ni
, NULL
, NULL
);
1372 t
->root
= halves
[1];
1375 tree234
*splitpos234(tree234
*t
, int index
, int before
) {
1380 count
= countnode234(t
->root
);
1381 if (index
< 0 || index
> count
)
1382 return NULL
; /* error */
1383 ret
= newtree234(t
->cmp
);
1384 n
= split234_internal(t
, index
);
1386 /* We want to return the ones before the index. */
1390 * We want to keep the ones before the index and return the
1393 ret
->root
= t
->root
;
1398 tree234
*split234(tree234
*t
, void *e
, cmpfn234 cmp
, int rel
) {
1402 assert(rel
!= REL234_EQ
);
1404 if (rel
== REL234_GT
|| rel
== REL234_GE
) {
1406 rel
= (rel
== REL234_GT ? REL234_LE
: REL234_LT
);
1410 if (!findrelpos234(t
, e
, cmp
, rel
, &index
))
1413 return splitpos234(t
, index
+1, before
);
1416 static node234
*copynode234(node234
*n
, copyfn234 copyfn
, void *copyfnstate
) {
1418 node234
*n2
= snew(node234
);
1420 for (i
= 0; i
< 3; i
++) {
1421 if (n
->elems
[i
] && copyfn
)
1422 n2
->elems
[i
] = copyfn(copyfnstate
, n
->elems
[i
]);
1424 n2
->elems
[i
] = n
->elems
[i
];
1427 for (i
= 0; i
< 4; i
++) {
1429 n2
->kids
[i
] = copynode234(n
->kids
[i
], copyfn
, copyfnstate
);
1430 n2
->kids
[i
]->parent
= n2
;
1434 n2
->counts
[i
] = n
->counts
[i
];
1439 tree234
*copytree234(tree234
*t
, copyfn234 copyfn
, void *copyfnstate
) {
1442 t2
= newtree234(t
->cmp
);
1444 t2
->root
= copynode234(t
->root
, copyfn
, copyfnstate
);
1445 t2
->root
->parent
= NULL
;
1455 * Test code for the 2-3-4 tree. This code maintains an alternative
1456 * representation of the data in the tree, in an array (using the
1457 * obvious and slow insert and delete functions). After each tree
1458 * operation, the verify() function is called, which ensures all
1459 * the tree properties are preserved:
1460 * - node->child->parent always equals node
1461 * - tree->root->parent always equals NULL
1462 * - number of kids == 0 or number of elements + 1;
1463 * - tree has the same depth everywhere
1464 * - every node has at least one element
1465 * - subtree element counts are accurate
1466 * - any NULL kid pointer is accompanied by a zero count
1467 * - in a sorted tree: ordering property between elements of a
1468 * node and elements of its children is preserved
1469 * and also ensures the list represented by the tree is the same
1470 * list it should be. (This last check also doubly verifies the
1471 * ordering properties, because the `same list it should be' is by
1472 * definition correctly ordered. It also ensures all nodes are
1473 * distinct, because the enum functions would get caught in a loop
1479 #define srealloc realloc
1482 * Error reporting function.
1484 void error(char *fmt
, ...) {
1488 vfprintf(stdout
, fmt
, ap
);
1493 /* The array representation of the data. */
1495 int arraylen
, arraysize
;
1498 /* The tree representation of the same data. */
1502 * Routines to provide a diagnostic printout of a tree. Currently
1503 * relies on every element in the tree being a one-character string
1510 int dispnode(node234
*n
, int level
, dispctx
*ctx
) {
1512 int xpos
= strlen(ctx
->levels
[0]);
1516 len
= sprintf(ctx
->levels
[0]+xpos
, " %s%s%s",
1517 n
->elems
[0], n
->elems
[1], n
->elems
[2]);
1518 else if (n
->elems
[1])
1519 len
= sprintf(ctx
->levels
[0]+xpos
, " %s%s",
1520 n
->elems
[0], n
->elems
[1]);
1522 len
= sprintf(ctx
->levels
[0]+xpos
, " %s",
1524 return xpos
+ 1 + (len
-1) / 2;
1527 int nodelen
, mypos
, myleft
, x
, i
;
1529 xpos
[0] = dispnode(n
->kids
[0], level
-3, ctx
);
1530 xpos
[1] = dispnode(n
->kids
[1], level
-3, ctx
);
1533 xpos
[2] = dispnode(n
->kids
[2], level
-3, ctx
);
1537 xpos
[3] = dispnode(n
->kids
[3], level
-3, ctx
);
1542 mypos
= (xpos
[1] + xpos
[2]) / 2;
1543 else if (nkids
== 3)
1546 mypos
= (xpos
[0] + xpos
[1]) / 2;
1547 nodelen
= nkids
* 2 - 1;
1548 myleft
= mypos
- ((nodelen
-1)/2);
1549 assert(myleft
>= xpos
[0]);
1550 assert(myleft
+ nodelen
-1 <= xpos
[nkids
-1]);
1552 x
= strlen(ctx
->levels
[level
]);
1553 while (x
<= xpos
[0] && x
< myleft
)
1554 ctx
->levels
[level
][x
++] = ' ';
1556 ctx
->levels
[level
][x
++] = '_';
1558 x
+= sprintf(ctx
->levels
[level
]+x
, ".%s.%s.%s.",
1559 n
->elems
[0], n
->elems
[1], n
->elems
[2]);
1561 x
+= sprintf(ctx
->levels
[level
]+x
, ".%s.%s.",
1562 n
->elems
[0], n
->elems
[1]);
1564 x
+= sprintf(ctx
->levels
[level
]+x
, ".%s.",
1566 while (x
< xpos
[nkids
-1])
1567 ctx
->levels
[level
][x
++] = '_';
1568 ctx
->levels
[level
][x
] = '\0';
1570 x
= strlen(ctx
->levels
[level
-1]);
1571 for (i
= 0; i
< nkids
; i
++) {
1574 if (i
> 0 && i
< nkids
-1)
1580 while (x
< pos
&& x
< rpos
)
1581 ctx
->levels
[level
-1][x
++] = ' ';
1583 ctx
->levels
[level
-1][x
++] = '|';
1584 while (x
< pos
|| x
< rpos
)
1585 ctx
->levels
[level
-1][x
++] = '_';
1587 ctx
->levels
[level
-1][x
++] = '|';
1589 ctx
->levels
[level
-1][x
] = '\0';
1591 x
= strlen(ctx
->levels
[level
-2]);
1592 for (i
= 0; i
< nkids
; i
++) {
1596 ctx
->levels
[level
-2][x
++] = ' ';
1597 ctx
->levels
[level
-2][x
++] = '|';
1599 ctx
->levels
[level
-2][x
] = '\0';
1605 void disptree(tree234
*t
) {
1608 int width
= count234(t
);
1609 int ht
= height234(t
) * 3 - 2;
1613 printf("[empty tree]\n");
1616 leveldata
= smalloc(ht
* (width
+2));
1617 ctx
.levels
= smalloc(ht
* sizeof(char *));
1618 for (i
= 0; i
< ht
; i
++) {
1619 ctx
.levels
[i
] = leveldata
+ i
* (width
+2);
1620 ctx
.levels
[i
][0] = '\0';
1623 (void) dispnode(t
->root
, ht
-1, &ctx
);
1626 printf("%s\n", ctx
.levels
[i
]);
1637 int chknode(chkctx
*ctx
, int level
, node234
*node
,
1638 void *lowbound
, void *highbound
) {
1643 /* Count the non-NULL kids. */
1644 for (nkids
= 0; nkids
< 4 && node
->kids
[nkids
]; nkids
++);
1645 /* Ensure no kids beyond the first NULL are non-NULL. */
1646 for (i
= nkids
; i
< 4; i
++)
1647 if (node
->kids
[i
]) {
1648 error("node %p: nkids=%d but kids[%d] non-NULL",
1650 } else if (node
->counts
[i
]) {
1651 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
1652 node
, i
, i
, node
->counts
[i
]);
1655 /* Count the non-NULL elements. */
1656 for (nelems
= 0; nelems
< 3 && node
->elems
[nelems
]; nelems
++);
1657 /* Ensure no elements beyond the first NULL are non-NULL. */
1658 for (i
= nelems
; i
< 3; i
++)
1659 if (node
->elems
[i
]) {
1660 error("node %p: nelems=%d but elems[%d] non-NULL",
1666 * If nkids==0, this is a leaf node; verify that the tree
1667 * depth is the same everywhere.
1669 if (ctx
->treedepth
< 0)
1670 ctx
->treedepth
= level
; /* we didn't know the depth yet */
1671 else if (ctx
->treedepth
!= level
)
1672 error("node %p: leaf at depth %d, previously seen depth %d",
1673 node
, level
, ctx
->treedepth
);
1676 * If nkids != 0, then it should be nelems+1, unless nelems
1677 * is 0 in which case nkids should also be 0 (and so we
1678 * shouldn't be in this condition at all).
1680 int shouldkids
= (nelems ? nelems
+1 : 0);
1681 if (nkids
!= shouldkids
) {
1682 error("node %p: %d elems should mean %d kids but has %d",
1683 node
, nelems
, shouldkids
, nkids
);
1688 * nelems should be at least 1.
1691 error("node %p: no elems", node
, nkids
);
1695 * Add nelems to the running element count of the whole tree.
1697 ctx
->elemcount
+= nelems
;
1700 * Check ordering property: all elements should be strictly >
1701 * lowbound, strictly < highbound, and strictly < each other in
1702 * sequence. (lowbound and highbound are NULL at edges of tree
1703 * - both NULL at root node - and NULL is considered to be <
1704 * everything and > everything. IYSWIM.)
1707 for (i
= -1; i
< nelems
; i
++) {
1708 void *lower
= (i
== -1 ? lowbound
: node
->elems
[i
]);
1709 void *higher
= (i
+1 == nelems ? highbound
: node
->elems
[i
+1]);
1710 if (lower
&& higher
&& cmp(lower
, higher
) >= 0) {
1711 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1712 node
, i
, lower
, i
+1, higher
);
1718 * Check parent pointers: all non-NULL kids should have a
1719 * parent pointer coming back to this node.
1721 for (i
= 0; i
< nkids
; i
++)
1722 if (node
->kids
[i
]->parent
!= node
) {
1723 error("node %p kid %d: parent ptr is %p not %p",
1724 node
, i
, node
->kids
[i
]->parent
, node
);
1729 * Now (finally!) recurse into subtrees.
1733 for (i
= 0; i
< nkids
; i
++) {
1734 void *lower
= (i
== 0 ? lowbound
: node
->elems
[i
-1]);
1735 void *higher
= (i
>= nelems ? highbound
: node
->elems
[i
]);
1736 int subcount
= chknode(ctx
, level
+1, node
->kids
[i
], lower
, higher
);
1737 if (node
->counts
[i
] != subcount
) {
1738 error("node %p kid %d: count says %d, subtree really has %d",
1739 node
, i
, node
->counts
[i
], subcount
);
1747 void verifytree(tree234
*tree
, void **array
, int arraylen
) {
1752 ctx
.treedepth
= -1; /* depth unknown yet */
1753 ctx
.elemcount
= 0; /* no elements seen yet */
1755 * Verify validity of tree properties.
1758 if (tree
->root
->parent
!= NULL
)
1759 error("root->parent is %p should be null", tree
->root
->parent
);
1760 chknode(&ctx
, 0, tree
->root
, NULL
, NULL
);
1762 printf("tree depth: %d\n", ctx
.treedepth
);
1764 * Enumerate the tree and ensure it matches up to the array.
1766 for (i
= 0; NULL
!= (p
= index234(tree
, i
)); i
++) {
1768 error("tree contains more than %d elements", arraylen
);
1770 error("enum at position %d: array says %s, tree says %s",
1773 if (ctx
.elemcount
!= i
) {
1774 error("tree really contains %d elements, enum gave %d",
1778 error("enum gave only %d elements, array has %d", i
, arraylen
);
1781 if (ctx
.elemcount
!= i
) {
1782 error("tree really contains %d elements, count234 gave %d",
1786 void verify(void) { verifytree(tree
, array
, arraylen
); }
1788 void internal_addtest(void *elem
, int index
, void *realret
) {
1792 if (arraysize
< arraylen
+1) {
1793 arraysize
= arraylen
+1+256;
1794 array
= (array
== NULL ?
smalloc(arraysize
*sizeof(*array
)) :
1795 srealloc(array
, arraysize
*sizeof(*array
)));
1799 /* now i points to the first element >= elem */
1800 retval
= elem
; /* expect elem returned (success) */
1801 for (j
= arraylen
; j
> i
; j
--)
1802 array
[j
] = array
[j
-1];
1803 array
[i
] = elem
; /* add elem to array */
1806 if (realret
!= retval
) {
1807 error("add: retval was %p expected %p", realret
, retval
);
1813 void addtest(void *elem
) {
1817 realret
= add234(tree
, elem
);
1820 while (i
< arraylen
&& cmp(elem
, array
[i
]) > 0)
1822 if (i
< arraylen
&& !cmp(elem
, array
[i
])) {
1823 void *retval
= array
[i
]; /* expect that returned not elem */
1824 if (realret
!= retval
) {
1825 error("add: retval was %p expected %p", realret
, retval
);
1828 internal_addtest(elem
, i
, realret
);
1831 void addpostest(void *elem
, int i
) {
1834 realret
= addpos234(tree
, elem
, i
);
1836 internal_addtest(elem
, i
, realret
);
1839 void delpostest(int i
) {
1841 void *elem
= array
[i
], *ret
;
1843 /* i points to the right element */
1844 while (i
< arraylen
-1) {
1845 array
[i
] = array
[i
+1];
1848 arraylen
--; /* delete elem from array */
1851 ret
= del234(tree
, elem
);
1853 ret
= delpos234(tree
, index
);
1856 error("del returned %p, expected %p", ret
, elem
);
1862 void deltest(void *elem
) {
1866 while (i
< arraylen
&& cmp(elem
, array
[i
]) > 0)
1868 if (i
>= arraylen
|| cmp(elem
, array
[i
]) != 0)
1869 return; /* don't do it! */
1873 /* A sample data set and test utility. Designed for pseudo-randomness,
1874 * and yet repeatability. */
1877 * This random number generator uses the `portable implementation'
1878 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1881 int randomnumber(unsigned *seed
) {
1882 *seed
*= 1103515245;
1884 return ((*seed
) / 65536) % 32768;
1887 int mycmp(void *av
, void *bv
) {
1888 char const *a
= (char const *)av
;
1889 char const *b
= (char const *)bv
;
1890 return strcmp(a
, b
);
1893 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1896 "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
1897 "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
1898 "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
1899 "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
1902 "a", "ab", "absque", "coram", "de",
1903 "palam", "clam", "cum", "ex", "e",
1904 "sine", "tenus", "pro", "prae",
1905 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1906 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1907 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1908 "murfl", "spoo", "breen", "flarn", "octothorpe",
1909 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1910 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1911 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1912 "wand", "ring", "amulet"
1916 #define NSTR lenof(strings)
1918 void findtest(void) {
1919 static const int rels
[] = {
1920 REL234_EQ
, REL234_GE
, REL234_LE
, REL234_LT
, REL234_GT
1922 static const char *const relnames
[] = {
1923 "EQ", "GE", "LE", "LT", "GT"
1925 int i
, j
, rel
, index
;
1926 char *p
, *ret
, *realret
, *realret2
;
1929 for (i
= 0; i
< (int)NSTR
; i
++) {
1931 for (j
= 0; j
< (int)(sizeof(rels
)/sizeof(*rels
)); j
++) {
1934 lo
= 0; hi
= arraylen
-1;
1936 mid
= (lo
+ hi
) / 2;
1937 c
= strcmp(p
, array
[mid
]);
1947 if (rel
== REL234_LT
)
1948 ret
= (mid
> 0 ? array
[--mid
] : NULL
);
1949 else if (rel
== REL234_GT
)
1950 ret
= (mid
< arraylen
-1 ? array
[++mid
] : NULL
);
1955 if (rel
== REL234_LT
|| rel
== REL234_LE
) {
1957 ret
= (hi
>= 0 ? array
[hi
] : NULL
);
1958 } else if (rel
== REL234_GT
|| rel
== REL234_GE
) {
1960 ret
= (lo
< arraylen ? array
[lo
] : NULL
);
1965 realret
= findrelpos234(tree
, p
, NULL
, rel
, &index
);
1966 if (realret
!= ret
) {
1967 error("find(\"%s\",%s) gave %s should be %s",
1968 p
, relnames
[j
], realret
, ret
);
1970 if (realret
&& index
!= mid
) {
1971 error("find(\"%s\",%s) gave %d should be %d",
1972 p
, relnames
[j
], index
, mid
);
1974 if (realret
&& rel
== REL234_EQ
) {
1975 realret2
= index234(tree
, index
);
1976 if (realret2
!= realret
) {
1977 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1978 p
, relnames
[j
], realret
, index
, index
, realret2
);
1982 printf("find(\"%s\",%s) gave %s(%d)\n", p
, relnames
[j
],
1988 realret
= findrelpos234(tree
, NULL
, NULL
, REL234_GT
, &index
);
1989 if (arraylen
&& (realret
!= array
[0] || index
!= 0)) {
1990 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1991 realret
, index
, array
[0]);
1992 } else if (!arraylen
&& (realret
!= NULL
)) {
1993 error("find(NULL,GT) gave %s(%d) should be NULL",
1997 realret
= findrelpos234(tree
, NULL
, NULL
, REL234_LT
, &index
);
1998 if (arraylen
&& (realret
!= array
[arraylen
-1] || index
!= arraylen
-1)) {
1999 error("find(NULL,LT) gave %s(%d) should be %s(0)",
2000 realret
, index
, array
[arraylen
-1]);
2001 } else if (!arraylen
&& (realret
!= NULL
)) {
2002 error("find(NULL,LT) gave %s(%d) should be NULL",
2007 void splittest(tree234
*tree
, void **array
, int arraylen
) {
2009 tree234
*tree3
, *tree4
;
2010 for (i
= 0; i
<= arraylen
; i
++) {
2011 tree3
= copytree234(tree
, NULL
, NULL
);
2012 tree4
= splitpos234(tree3
, i
, 0);
2013 verifytree(tree3
, array
, i
);
2014 verifytree(tree4
, array
+i
, arraylen
-i
);
2015 join234(tree3
, tree4
);
2016 freetree234(tree4
); /* left empty by join */
2017 verifytree(tree3
, array
, arraylen
);
2025 int tworoot
, tmplen
;
2027 tree234
*tree2
, *tree3
, *tree4
;
2030 setvbuf(stdout
, NULL
, _IOLBF
, 0);
2032 for (i
= 0; i
< (int)NSTR
; i
++) in
[i
] = 0;
2034 arraylen
= arraysize
= 0;
2035 tree
= newtree234(mycmp
);
2039 for (i
= 0; i
< 10000; i
++) {
2040 j
= randomnumber(&seed
);
2042 printf("trial: %d\n", i
);
2044 printf("deleting %s (%d)\n", strings
[j
], j
);
2045 deltest(strings
[j
]);
2048 printf("adding %s (%d)\n", strings
[j
], j
);
2049 addtest(strings
[j
]);
2056 while (arraylen
> 0) {
2057 j
= randomnumber(&seed
);
2065 * Now try an unsorted tree. We don't really need to test
2066 * delpos234 because we know del234 is based on it, so it's
2067 * already been tested in the above sorted-tree code; but for
2068 * completeness we'll use it to tear down our unsorted tree
2069 * once we've built it.
2071 tree
= newtree234(NULL
);
2074 for (i
= 0; i
< 1000; i
++) {
2075 printf("trial: %d\n", i
);
2076 j
= randomnumber(&seed
);
2078 k
= randomnumber(&seed
);
2079 k
%= count234(tree
)+1;
2080 printf("adding string %s at index %d\n", strings
[j
], k
);
2081 addpostest(strings
[j
], k
);
2085 * While we have this tree in its full form, we'll take a copy
2086 * of it to use in split and join testing.
2088 tree2
= copytree234(tree
, NULL
, NULL
);
2089 verifytree(tree2
, array
, arraylen
);/* check the copy is accurate */
2091 * Split tests. Split the tree at every possible point and
2092 * check the resulting subtrees.
2094 tworoot
= (!tree2
->root
->elems
[1]);/* see if it has a 2-root */
2095 splittest(tree2
, array
, arraylen
);
2097 * Now do the split test again, but on a tree that has a 2-root
2098 * (if the previous one didn't) or doesn't (if the previous one
2102 while ((!tree2
->root
->elems
[1]) == tworoot
) {
2103 delpos234(tree2
, --tmplen
);
2105 printf("now trying splits on second tree\n");
2106 splittest(tree2
, array
, tmplen
);
2110 * Back to the main testing of uncounted trees.
2112 while (count234(tree
) > 0) {
2113 printf("cleanup: tree size %d\n", count234(tree
));
2114 j
= randomnumber(&seed
);
2115 j
%= count234(tree
);
2116 printf("deleting string %s from index %d\n", (char *)array
[j
], j
);
2122 * Finally, do some testing on split/join on _sorted_ trees. At
2123 * the same time, we'll be testing split on very small trees.
2125 tree
= newtree234(mycmp
);
2128 for (i
= 0; i
< 17; i
++) {
2129 tree2
= copytree234(tree
, NULL
, NULL
);
2130 splittest(tree2
, array
, arraylen
);
2133 addtest(strings
[i
]);
2138 * Test silly cases of join: join(emptytree, emptytree), and
2139 * also ensure join correctly spots when sorted trees fail the
2140 * ordering constraint.
2142 tree
= newtree234(mycmp
);
2143 tree2
= newtree234(mycmp
);
2144 tree3
= newtree234(mycmp
);
2145 tree4
= newtree234(mycmp
);
2146 assert(mycmp(strings
[0], strings
[1]) < 0); /* just in case :-) */
2147 add234(tree2
, strings
[1]);
2148 add234(tree4
, strings
[0]);
2149 array
[0] = strings
[0];
2150 array
[1] = strings
[1];
2151 verifytree(tree
, array
, 0);
2152 verifytree(tree2
, array
+1, 1);
2153 verifytree(tree3
, array
, 0);
2154 verifytree(tree4
, array
, 1);
2158 * - join(tree,tree3) should leave both tree and tree3 unchanged.
2159 * - joinr(tree,tree2) should leave both tree and tree2 unchanged.
2160 * - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
2161 * - join(tree, tree2) should move the element from tree2 to tree.
2162 * - joinr(tree4, tree3) should move the element from tree4 to tree3.
2163 * - join(tree,tree3) should return NULL and leave both unchanged.
2164 * - join(tree3,tree) should work and create a bigger tree in tree3.
2166 assert(tree
== join234(tree
, tree3
));
2167 verifytree(tree
, array
, 0);
2168 verifytree(tree3
, array
, 0);
2169 assert(tree2
== join234r(tree
, tree2
));
2170 verifytree(tree
, array
, 0);
2171 verifytree(tree2
, array
+1, 1);
2172 assert(tree4
== join234(tree4
, tree3
));
2173 verifytree(tree3
, array
, 0);
2174 verifytree(tree4
, array
, 1);
2175 assert(tree
== join234(tree
, tree2
));
2176 verifytree(tree
, array
+1, 1);
2177 verifytree(tree2
, array
, 0);
2178 assert(tree3
== join234r(tree4
, tree3
));
2179 verifytree(tree3
, array
, 1);
2180 verifytree(tree4
, array
, 0);
2181 assert(NULL
== join234(tree
, tree3
));
2182 verifytree(tree
, array
+1, 1);
2183 verifytree(tree3
, array
, 1);
2184 assert(tree3
== join234(tree3
, tree
));
2185 verifytree(tree3
, array
, 2);
2186 verifytree(tree
, array
, 0);
2193 #if 0 /* sorted list of strings might be useful */
2195 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",