720a8fb7 |
1 | /* |
2 | * tree234.c: reasonably generic counted 2-3-4 tree routines. |
3 | * |
4 | * This file is copyright 1999-2001 Simon Tatham. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person |
7 | * obtaining a copy of this software and associated documentation |
8 | * files (the "Software"), to deal in the Software without |
9 | * restriction, including without limitation the rights to use, |
10 | * copy, modify, merge, publish, distribute, sublicense, and/or |
11 | * sell copies of the Software, and to permit persons to whom the |
12 | * Software is furnished to do so, subject to the following |
13 | * conditions: |
14 | * |
15 | * The above copyright notice and this permission notice shall be |
16 | * included in all copies or substantial portions of the Software. |
17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR |
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
23 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
25 | * SOFTWARE. |
26 | */ |
27 | |
28 | #include <stdio.h> |
29 | #include <stdlib.h> |
30 | #include <assert.h> |
31 | |
32 | #include "tree234.h" |
33 | |
24055572 |
34 | #include "puzzles.h" /* for smalloc/sfree */ |
720a8fb7 |
35 | |
36 | #ifdef TEST |
37 | #define LOG(x) (printf x) |
7f82318f |
38 | #define smalloc malloc |
39 | #define srealloc realloc |
40 | #define sfree free |
720a8fb7 |
41 | #else |
42 | #define LOG(x) |
43 | #endif |
44 | |
45 | typedef struct node234_Tag node234; |
46 | |
47 | struct tree234_Tag { |
48 | node234 *root; |
49 | cmpfn234 cmp; |
50 | }; |
51 | |
52 | struct node234_Tag { |
53 | node234 *parent; |
54 | node234 *kids[4]; |
55 | int counts[4]; |
56 | void *elems[3]; |
57 | }; |
58 | |
59 | /* |
60 | * Create a 2-3-4 tree. |
61 | */ |
62 | tree234 *newtree234(cmpfn234 cmp) { |
24055572 |
63 | tree234 *ret = snew(tree234); |
720a8fb7 |
64 | LOG(("created tree %p\n", ret)); |
65 | ret->root = NULL; |
66 | ret->cmp = cmp; |
67 | return ret; |
68 | } |
69 | |
70 | /* |
71 | * Free a 2-3-4 tree (not including freeing the elements). |
72 | */ |
73 | static void freenode234(node234 *n) { |
74 | if (!n) |
75 | return; |
76 | freenode234(n->kids[0]); |
77 | freenode234(n->kids[1]); |
78 | freenode234(n->kids[2]); |
79 | freenode234(n->kids[3]); |
80 | sfree(n); |
81 | } |
82 | void freetree234(tree234 *t) { |
83 | freenode234(t->root); |
84 | sfree(t); |
85 | } |
86 | |
87 | /* |
88 | * Internal function to count a node. |
89 | */ |
90 | static int countnode234(node234 *n) { |
91 | int count = 0; |
92 | int i; |
93 | if (!n) |
94 | return 0; |
95 | for (i = 0; i < 4; i++) |
96 | count += n->counts[i]; |
97 | for (i = 0; i < 3; i++) |
98 | if (n->elems[i]) |
99 | count++; |
100 | return count; |
101 | } |
102 | |
103 | /* |
104 | * Count the elements in a tree. |
105 | */ |
106 | int count234(tree234 *t) { |
107 | if (t->root) |
108 | return countnode234(t->root); |
109 | else |
110 | return 0; |
111 | } |
112 | |
113 | /* |
114 | * Propagate a node overflow up a tree until it stops. Returns 0 or |
115 | * 1, depending on whether the root had to be split or not. |
116 | */ |
117 | static int add234_insert(node234 *left, void *e, node234 *right, |
118 | node234 **root, node234 *n, int ki) { |
119 | int lcount, rcount; |
120 | /* |
121 | * We need to insert the new left/element/right set in n at |
122 | * child position ki. |
123 | */ |
124 | lcount = countnode234(left); |
125 | rcount = countnode234(right); |
126 | while (n) { |
127 | LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
128 | n, |
129 | n->kids[0], n->counts[0], n->elems[0], |
130 | n->kids[1], n->counts[1], n->elems[1], |
131 | n->kids[2], n->counts[2], n->elems[2], |
132 | n->kids[3], n->counts[3])); |
133 | LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n", |
134 | left, lcount, e, right, rcount, ki)); |
135 | if (n->elems[1] == NULL) { |
136 | /* |
137 | * Insert in a 2-node; simple. |
138 | */ |
139 | if (ki == 0) { |
140 | LOG((" inserting on left of 2-node\n")); |
141 | n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1]; |
142 | n->elems[1] = n->elems[0]; |
143 | n->kids[1] = right; n->counts[1] = rcount; |
144 | n->elems[0] = e; |
145 | n->kids[0] = left; n->counts[0] = lcount; |
146 | } else { /* ki == 1 */ |
147 | LOG((" inserting on right of 2-node\n")); |
148 | n->kids[2] = right; n->counts[2] = rcount; |
149 | n->elems[1] = e; |
150 | n->kids[1] = left; n->counts[1] = lcount; |
151 | } |
152 | if (n->kids[0]) n->kids[0]->parent = n; |
153 | if (n->kids[1]) n->kids[1]->parent = n; |
154 | if (n->kids[2]) n->kids[2]->parent = n; |
155 | LOG((" done\n")); |
156 | break; |
157 | } else if (n->elems[2] == NULL) { |
158 | /* |
159 | * Insert in a 3-node; simple. |
160 | */ |
161 | if (ki == 0) { |
162 | LOG((" inserting on left of 3-node\n")); |
163 | n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2]; |
164 | n->elems[2] = n->elems[1]; |
165 | n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1]; |
166 | n->elems[1] = n->elems[0]; |
167 | n->kids[1] = right; n->counts[1] = rcount; |
168 | n->elems[0] = e; |
169 | n->kids[0] = left; n->counts[0] = lcount; |
170 | } else if (ki == 1) { |
171 | LOG((" inserting in middle of 3-node\n")); |
172 | n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2]; |
173 | n->elems[2] = n->elems[1]; |
174 | n->kids[2] = right; n->counts[2] = rcount; |
175 | n->elems[1] = e; |
176 | n->kids[1] = left; n->counts[1] = lcount; |
177 | } else { /* ki == 2 */ |
178 | LOG((" inserting on right of 3-node\n")); |
179 | n->kids[3] = right; n->counts[3] = rcount; |
180 | n->elems[2] = e; |
181 | n->kids[2] = left; n->counts[2] = lcount; |
182 | } |
183 | if (n->kids[0]) n->kids[0]->parent = n; |
184 | if (n->kids[1]) n->kids[1]->parent = n; |
185 | if (n->kids[2]) n->kids[2]->parent = n; |
186 | if (n->kids[3]) n->kids[3]->parent = n; |
187 | LOG((" done\n")); |
188 | break; |
189 | } else { |
24055572 |
190 | node234 *m = snew(node234); |
720a8fb7 |
191 | m->parent = n->parent; |
192 | LOG((" splitting a 4-node; created new node %p\n", m)); |
193 | /* |
194 | * Insert in a 4-node; split into a 2-node and a |
195 | * 3-node, and move focus up a level. |
196 | * |
197 | * I don't think it matters which way round we put the |
198 | * 2 and the 3. For simplicity, we'll put the 3 first |
199 | * always. |
200 | */ |
201 | if (ki == 0) { |
202 | m->kids[0] = left; m->counts[0] = lcount; |
203 | m->elems[0] = e; |
204 | m->kids[1] = right; m->counts[1] = rcount; |
205 | m->elems[1] = n->elems[0]; |
206 | m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1]; |
207 | e = n->elems[1]; |
208 | n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2]; |
209 | n->elems[0] = n->elems[2]; |
210 | n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; |
211 | } else if (ki == 1) { |
212 | m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; |
213 | m->elems[0] = n->elems[0]; |
214 | m->kids[1] = left; m->counts[1] = lcount; |
215 | m->elems[1] = e; |
216 | m->kids[2] = right; m->counts[2] = rcount; |
217 | e = n->elems[1]; |
218 | n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2]; |
219 | n->elems[0] = n->elems[2]; |
220 | n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; |
221 | } else if (ki == 2) { |
222 | m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; |
223 | m->elems[0] = n->elems[0]; |
224 | m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1]; |
225 | m->elems[1] = n->elems[1]; |
226 | m->kids[2] = left; m->counts[2] = lcount; |
227 | /* e = e; */ |
228 | n->kids[0] = right; n->counts[0] = rcount; |
229 | n->elems[0] = n->elems[2]; |
230 | n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; |
231 | } else { /* ki == 3 */ |
232 | m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; |
233 | m->elems[0] = n->elems[0]; |
234 | m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1]; |
235 | m->elems[1] = n->elems[1]; |
236 | m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2]; |
237 | n->kids[0] = left; n->counts[0] = lcount; |
238 | n->elems[0] = e; |
239 | n->kids[1] = right; n->counts[1] = rcount; |
240 | e = n->elems[2]; |
241 | } |
242 | m->kids[3] = n->kids[3] = n->kids[2] = NULL; |
243 | m->counts[3] = n->counts[3] = n->counts[2] = 0; |
244 | m->elems[2] = n->elems[2] = n->elems[1] = NULL; |
245 | if (m->kids[0]) m->kids[0]->parent = m; |
246 | if (m->kids[1]) m->kids[1]->parent = m; |
247 | if (m->kids[2]) m->kids[2]->parent = m; |
248 | if (n->kids[0]) n->kids[0]->parent = n; |
249 | if (n->kids[1]) n->kids[1]->parent = n; |
250 | LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m, |
251 | m->kids[0], m->counts[0], m->elems[0], |
252 | m->kids[1], m->counts[1], m->elems[1], |
253 | m->kids[2], m->counts[2])); |
254 | LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n, |
255 | n->kids[0], n->counts[0], n->elems[0], |
256 | n->kids[1], n->counts[1])); |
257 | left = m; lcount = countnode234(left); |
258 | right = n; rcount = countnode234(right); |
259 | } |
260 | if (n->parent) |
261 | ki = (n->parent->kids[0] == n ? 0 : |
262 | n->parent->kids[1] == n ? 1 : |
263 | n->parent->kids[2] == n ? 2 : 3); |
264 | n = n->parent; |
265 | } |
266 | |
267 | /* |
268 | * If we've come out of here by `break', n will still be |
269 | * non-NULL and all we need to do is go back up the tree |
270 | * updating counts. If we've come here because n is NULL, we |
271 | * need to create a new root for the tree because the old one |
272 | * has just split into two. */ |
273 | if (n) { |
274 | while (n->parent) { |
275 | int count = countnode234(n); |
276 | int childnum; |
277 | childnum = (n->parent->kids[0] == n ? 0 : |
278 | n->parent->kids[1] == n ? 1 : |
279 | n->parent->kids[2] == n ? 2 : 3); |
280 | n->parent->counts[childnum] = count; |
281 | n = n->parent; |
282 | } |
283 | return 0; /* root unchanged */ |
284 | } else { |
285 | LOG((" root is overloaded, split into two\n")); |
24055572 |
286 | (*root) = snew(node234); |
720a8fb7 |
287 | (*root)->kids[0] = left; (*root)->counts[0] = lcount; |
288 | (*root)->elems[0] = e; |
289 | (*root)->kids[1] = right; (*root)->counts[1] = rcount; |
290 | (*root)->elems[1] = NULL; |
291 | (*root)->kids[2] = NULL; (*root)->counts[2] = 0; |
292 | (*root)->elems[2] = NULL; |
293 | (*root)->kids[3] = NULL; (*root)->counts[3] = 0; |
294 | (*root)->parent = NULL; |
295 | if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root); |
296 | if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root); |
297 | LOG((" new root is %p/%d \"%s\" %p/%d\n", |
298 | (*root)->kids[0], (*root)->counts[0], |
299 | (*root)->elems[0], |
300 | (*root)->kids[1], (*root)->counts[1])); |
301 | return 1; /* root moved */ |
302 | } |
303 | } |
304 | |
305 | /* |
306 | * Add an element e to a 2-3-4 tree t. Returns e on success, or if |
307 | * an existing element compares equal, returns that. |
308 | */ |
309 | static void *add234_internal(tree234 *t, void *e, int index) { |
310 | node234 *n; |
311 | int ki; |
312 | void *orig_e = e; |
313 | int c; |
314 | |
315 | LOG(("adding element \"%s\" to tree %p\n", e, t)); |
316 | if (t->root == NULL) { |
24055572 |
317 | t->root = snew(node234); |
720a8fb7 |
318 | t->root->elems[1] = t->root->elems[2] = NULL; |
319 | t->root->kids[0] = t->root->kids[1] = NULL; |
320 | t->root->kids[2] = t->root->kids[3] = NULL; |
321 | t->root->counts[0] = t->root->counts[1] = 0; |
322 | t->root->counts[2] = t->root->counts[3] = 0; |
323 | t->root->parent = NULL; |
324 | t->root->elems[0] = e; |
325 | LOG((" created root %p\n", t->root)); |
326 | return orig_e; |
327 | } |
328 | |
329 | n = t->root; |
330 | while (n) { |
331 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
332 | n, |
333 | n->kids[0], n->counts[0], n->elems[0], |
334 | n->kids[1], n->counts[1], n->elems[1], |
335 | n->kids[2], n->counts[2], n->elems[2], |
336 | n->kids[3], n->counts[3])); |
337 | if (index >= 0) { |
338 | if (!n->kids[0]) { |
339 | /* |
340 | * Leaf node. We want to insert at kid position |
341 | * equal to the index: |
342 | * |
343 | * 0 A 1 B 2 C 3 |
344 | */ |
345 | ki = index; |
346 | } else { |
347 | /* |
348 | * Internal node. We always descend through it (add |
349 | * always starts at the bottom, never in the |
350 | * middle). |
351 | */ |
352 | if (index <= n->counts[0]) { |
353 | ki = 0; |
354 | } else if (index -= n->counts[0] + 1, index <= n->counts[1]) { |
355 | ki = 1; |
356 | } else if (index -= n->counts[1] + 1, index <= n->counts[2]) { |
357 | ki = 2; |
358 | } else if (index -= n->counts[2] + 1, index <= n->counts[3]) { |
359 | ki = 3; |
360 | } else |
361 | return NULL; /* error: index out of range */ |
362 | } |
363 | } else { |
364 | if ((c = t->cmp(e, n->elems[0])) < 0) |
365 | ki = 0; |
366 | else if (c == 0) |
367 | return n->elems[0]; /* already exists */ |
368 | else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0) |
369 | ki = 1; |
370 | else if (c == 0) |
371 | return n->elems[1]; /* already exists */ |
372 | else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0) |
373 | ki = 2; |
374 | else if (c == 0) |
375 | return n->elems[2]; /* already exists */ |
376 | else |
377 | ki = 3; |
378 | } |
379 | LOG((" moving to child %d (%p)\n", ki, n->kids[ki])); |
380 | if (!n->kids[ki]) |
381 | break; |
382 | n = n->kids[ki]; |
383 | } |
384 | |
385 | add234_insert(NULL, e, NULL, &t->root, n, ki); |
386 | |
387 | return orig_e; |
388 | } |
389 | |
390 | void *add234(tree234 *t, void *e) { |
391 | if (!t->cmp) /* tree is unsorted */ |
392 | return NULL; |
393 | |
394 | return add234_internal(t, e, -1); |
395 | } |
396 | void *addpos234(tree234 *t, void *e, int index) { |
397 | if (index < 0 || /* index out of range */ |
398 | t->cmp) /* tree is sorted */ |
399 | return NULL; /* return failure */ |
400 | |
401 | return add234_internal(t, e, index); /* this checks the upper bound */ |
402 | } |
403 | |
404 | /* |
405 | * Look up the element at a given numeric index in a 2-3-4 tree. |
406 | * Returns NULL if the index is out of range. |
407 | */ |
408 | void *index234(tree234 *t, int index) { |
409 | node234 *n; |
410 | |
411 | if (!t->root) |
412 | return NULL; /* tree is empty */ |
413 | |
414 | if (index < 0 || index >= countnode234(t->root)) |
415 | return NULL; /* out of range */ |
416 | |
417 | n = t->root; |
418 | |
419 | while (n) { |
420 | if (index < n->counts[0]) |
421 | n = n->kids[0]; |
422 | else if (index -= n->counts[0] + 1, index < 0) |
423 | return n->elems[0]; |
424 | else if (index < n->counts[1]) |
425 | n = n->kids[1]; |
426 | else if (index -= n->counts[1] + 1, index < 0) |
427 | return n->elems[1]; |
428 | else if (index < n->counts[2]) |
429 | n = n->kids[2]; |
430 | else if (index -= n->counts[2] + 1, index < 0) |
431 | return n->elems[2]; |
432 | else |
433 | n = n->kids[3]; |
434 | } |
435 | |
436 | /* We shouldn't ever get here. I wonder how we did. */ |
437 | return NULL; |
438 | } |
439 | |
440 | /* |
441 | * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not |
442 | * found. e is always passed as the first argument to cmp, so cmp |
443 | * can be an asymmetric function if desired. cmp can also be passed |
444 | * as NULL, in which case the compare function from the tree proper |
445 | * will be used. |
446 | */ |
447 | void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, |
448 | int relation, int *index) { |
449 | node234 *n; |
450 | void *ret; |
451 | int c; |
452 | int idx, ecount, kcount, cmpret; |
453 | |
454 | if (t->root == NULL) |
455 | return NULL; |
456 | |
457 | if (cmp == NULL) |
458 | cmp = t->cmp; |
459 | |
460 | n = t->root; |
461 | /* |
462 | * Attempt to find the element itself. |
463 | */ |
464 | idx = 0; |
465 | ecount = -1; |
466 | /* |
467 | * Prepare a fake `cmp' result if e is NULL. |
468 | */ |
469 | cmpret = 0; |
470 | if (e == NULL) { |
471 | assert(relation == REL234_LT || relation == REL234_GT); |
472 | if (relation == REL234_LT) |
473 | cmpret = +1; /* e is a max: always greater */ |
474 | else if (relation == REL234_GT) |
475 | cmpret = -1; /* e is a min: always smaller */ |
476 | } |
477 | while (1) { |
478 | for (kcount = 0; kcount < 4; kcount++) { |
479 | if (kcount >= 3 || n->elems[kcount] == NULL || |
480 | (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) { |
481 | break; |
482 | } |
483 | if (n->kids[kcount]) idx += n->counts[kcount]; |
484 | if (c == 0) { |
485 | ecount = kcount; |
486 | break; |
487 | } |
488 | idx++; |
489 | } |
490 | if (ecount >= 0) |
491 | break; |
492 | if (n->kids[kcount]) |
493 | n = n->kids[kcount]; |
494 | else |
495 | break; |
496 | } |
497 | |
498 | if (ecount >= 0) { |
499 | /* |
500 | * We have found the element we're looking for. It's |
501 | * n->elems[ecount], at tree index idx. If our search |
502 | * relation is EQ, LE or GE we can now go home. |
503 | */ |
504 | if (relation != REL234_LT && relation != REL234_GT) { |
505 | if (index) *index = idx; |
506 | return n->elems[ecount]; |
507 | } |
508 | |
509 | /* |
510 | * Otherwise, we'll do an indexed lookup for the previous |
511 | * or next element. (It would be perfectly possible to |
512 | * implement these search types in a non-counted tree by |
513 | * going back up from where we are, but far more fiddly.) |
514 | */ |
515 | if (relation == REL234_LT) |
516 | idx--; |
517 | else |
518 | idx++; |
519 | } else { |
520 | /* |
521 | * We've found our way to the bottom of the tree and we |
522 | * know where we would insert this node if we wanted to: |
523 | * we'd put it in in place of the (empty) subtree |
524 | * n->kids[kcount], and it would have index idx |
525 | * |
526 | * But the actual element isn't there. So if our search |
527 | * relation is EQ, we're doomed. |
528 | */ |
529 | if (relation == REL234_EQ) |
530 | return NULL; |
531 | |
532 | /* |
533 | * Otherwise, we must do an index lookup for index idx-1 |
534 | * (if we're going left - LE or LT) or index idx (if we're |
535 | * going right - GE or GT). |
536 | */ |
537 | if (relation == REL234_LT || relation == REL234_LE) { |
538 | idx--; |
539 | } |
540 | } |
541 | |
542 | /* |
543 | * We know the index of the element we want; just call index234 |
544 | * to do the rest. This will return NULL if the index is out of |
545 | * bounds, which is exactly what we want. |
546 | */ |
547 | ret = index234(t, idx); |
548 | if (ret && index) *index = idx; |
549 | return ret; |
550 | } |
551 | void *find234(tree234 *t, void *e, cmpfn234 cmp) { |
552 | return findrelpos234(t, e, cmp, REL234_EQ, NULL); |
553 | } |
554 | void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) { |
555 | return findrelpos234(t, e, cmp, relation, NULL); |
556 | } |
557 | void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) { |
558 | return findrelpos234(t, e, cmp, REL234_EQ, index); |
559 | } |
560 | |
561 | /* |
562 | * Tree transformation used in delete and split: move a subtree |
563 | * right, from child ki of a node to the next child. Update k and |
564 | * index so that they still point to the same place in the |
565 | * transformed tree. Assumes the destination child is not full, and |
566 | * that the source child does have a subtree to spare. Can cope if |
567 | * the destination child is undersized. |
568 | * |
569 | * . C . . B . |
570 | * / \ -> / \ |
571 | * [more] a A b B c d D e [more] a A b c C d D e |
572 | * |
573 | * . C . . B . |
574 | * / \ -> / \ |
575 | * [more] a A b B c d [more] a A b c C d |
576 | */ |
577 | static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) { |
578 | node234 *src, *dest; |
579 | int i, srclen, adjust; |
580 | |
581 | src = n->kids[ki]; |
582 | dest = n->kids[ki+1]; |
583 | |
584 | LOG((" trans234_subtree_right(%p, %d):\n", n, ki)); |
585 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
586 | n, |
587 | n->kids[0], n->counts[0], n->elems[0], |
588 | n->kids[1], n->counts[1], n->elems[1], |
589 | n->kids[2], n->counts[2], n->elems[2], |
590 | n->kids[3], n->counts[3])); |
591 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
592 | src, |
593 | src->kids[0], src->counts[0], src->elems[0], |
594 | src->kids[1], src->counts[1], src->elems[1], |
595 | src->kids[2], src->counts[2], src->elems[2], |
596 | src->kids[3], src->counts[3])); |
597 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
598 | dest, |
599 | dest->kids[0], dest->counts[0], dest->elems[0], |
600 | dest->kids[1], dest->counts[1], dest->elems[1], |
601 | dest->kids[2], dest->counts[2], dest->elems[2], |
602 | dest->kids[3], dest->counts[3])); |
603 | /* |
604 | * Move over the rest of the destination node to make space. |
605 | */ |
606 | dest->kids[3] = dest->kids[2]; dest->counts[3] = dest->counts[2]; |
607 | dest->elems[2] = dest->elems[1]; |
608 | dest->kids[2] = dest->kids[1]; dest->counts[2] = dest->counts[1]; |
609 | dest->elems[1] = dest->elems[0]; |
610 | dest->kids[1] = dest->kids[0]; dest->counts[1] = dest->counts[0]; |
611 | |
612 | /* which element to move over */ |
613 | i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0); |
614 | |
615 | dest->elems[0] = n->elems[ki]; |
616 | n->elems[ki] = src->elems[i]; |
617 | src->elems[i] = NULL; |
618 | |
619 | dest->kids[0] = src->kids[i+1]; dest->counts[0] = src->counts[i+1]; |
620 | src->kids[i+1] = NULL; src->counts[i+1] = 0; |
621 | |
622 | if (dest->kids[0]) dest->kids[0]->parent = dest; |
623 | |
624 | adjust = dest->counts[0] + 1; |
625 | |
626 | n->counts[ki] -= adjust; |
627 | n->counts[ki+1] += adjust; |
628 | |
629 | srclen = n->counts[ki]; |
630 | |
631 | if (k) { |
632 | LOG((" before: k,index = %d,%d\n", (*k), (*index))); |
633 | if ((*k) == ki && (*index) > srclen) { |
634 | (*index) -= srclen + 1; |
635 | (*k)++; |
636 | } else if ((*k) == ki+1) { |
637 | (*index) += adjust; |
638 | } |
639 | LOG((" after: k,index = %d,%d\n", (*k), (*index))); |
640 | } |
641 | |
642 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
643 | n, |
644 | n->kids[0], n->counts[0], n->elems[0], |
645 | n->kids[1], n->counts[1], n->elems[1], |
646 | n->kids[2], n->counts[2], n->elems[2], |
647 | n->kids[3], n->counts[3])); |
648 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
649 | src, |
650 | src->kids[0], src->counts[0], src->elems[0], |
651 | src->kids[1], src->counts[1], src->elems[1], |
652 | src->kids[2], src->counts[2], src->elems[2], |
653 | src->kids[3], src->counts[3])); |
654 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
655 | dest, |
656 | dest->kids[0], dest->counts[0], dest->elems[0], |
657 | dest->kids[1], dest->counts[1], dest->elems[1], |
658 | dest->kids[2], dest->counts[2], dest->elems[2], |
659 | dest->kids[3], dest->counts[3])); |
660 | } |
661 | |
662 | /* |
663 | * Tree transformation used in delete and split: move a subtree |
664 | * left, from child ki of a node to the previous child. Update k |
665 | * and index so that they still point to the same place in the |
666 | * transformed tree. Assumes the destination child is not full, and |
667 | * that the source child does have a subtree to spare. Can cope if |
668 | * the destination child is undersized. |
669 | * |
670 | * . B . . C . |
671 | * / \ -> / \ |
672 | * a A b c C d D e [more] a A b B c d D e [more] |
673 | * |
674 | * . A . . B . |
675 | * / \ -> / \ |
676 | * a b B c C d [more] a A b c C d [more] |
677 | */ |
678 | static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) { |
679 | node234 *src, *dest; |
680 | int i, adjust; |
681 | |
682 | src = n->kids[ki]; |
683 | dest = n->kids[ki-1]; |
684 | |
685 | LOG((" trans234_subtree_left(%p, %d):\n", n, ki)); |
686 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
687 | n, |
688 | n->kids[0], n->counts[0], n->elems[0], |
689 | n->kids[1], n->counts[1], n->elems[1], |
690 | n->kids[2], n->counts[2], n->elems[2], |
691 | n->kids[3], n->counts[3])); |
692 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
693 | dest, |
694 | dest->kids[0], dest->counts[0], dest->elems[0], |
695 | dest->kids[1], dest->counts[1], dest->elems[1], |
696 | dest->kids[2], dest->counts[2], dest->elems[2], |
697 | dest->kids[3], dest->counts[3])); |
698 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
699 | src, |
700 | src->kids[0], src->counts[0], src->elems[0], |
701 | src->kids[1], src->counts[1], src->elems[1], |
702 | src->kids[2], src->counts[2], src->elems[2], |
703 | src->kids[3], src->counts[3])); |
704 | |
705 | /* where in dest to put it */ |
706 | i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0); |
707 | dest->elems[i] = n->elems[ki-1]; |
708 | n->elems[ki-1] = src->elems[0]; |
709 | |
710 | dest->kids[i+1] = src->kids[0]; dest->counts[i+1] = src->counts[0]; |
711 | |
712 | if (dest->kids[i+1]) dest->kids[i+1]->parent = dest; |
713 | |
714 | /* |
715 | * Move over the rest of the source node. |
716 | */ |
717 | src->kids[0] = src->kids[1]; src->counts[0] = src->counts[1]; |
718 | src->elems[0] = src->elems[1]; |
719 | src->kids[1] = src->kids[2]; src->counts[1] = src->counts[2]; |
720 | src->elems[1] = src->elems[2]; |
721 | src->kids[2] = src->kids[3]; src->counts[2] = src->counts[3]; |
722 | src->elems[2] = NULL; |
723 | src->kids[3] = NULL; src->counts[3] = 0; |
724 | |
725 | adjust = dest->counts[i+1] + 1; |
726 | |
727 | n->counts[ki] -= adjust; |
728 | n->counts[ki-1] += adjust; |
729 | |
730 | if (k) { |
731 | LOG((" before: k,index = %d,%d\n", (*k), (*index))); |
732 | if ((*k) == ki) { |
733 | (*index) -= adjust; |
734 | if ((*index) < 0) { |
735 | (*index) += n->counts[ki-1] + 1; |
736 | (*k)--; |
737 | } |
738 | } |
739 | LOG((" after: k,index = %d,%d\n", (*k), (*index))); |
740 | } |
741 | |
742 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
743 | n, |
744 | n->kids[0], n->counts[0], n->elems[0], |
745 | n->kids[1], n->counts[1], n->elems[1], |
746 | n->kids[2], n->counts[2], n->elems[2], |
747 | n->kids[3], n->counts[3])); |
748 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
749 | dest, |
750 | dest->kids[0], dest->counts[0], dest->elems[0], |
751 | dest->kids[1], dest->counts[1], dest->elems[1], |
752 | dest->kids[2], dest->counts[2], dest->elems[2], |
753 | dest->kids[3], dest->counts[3])); |
754 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
755 | src, |
756 | src->kids[0], src->counts[0], src->elems[0], |
757 | src->kids[1], src->counts[1], src->elems[1], |
758 | src->kids[2], src->counts[2], src->elems[2], |
759 | src->kids[3], src->counts[3])); |
760 | } |
761 | |
762 | /* |
763 | * Tree transformation used in delete and split: merge child nodes |
764 | * ki and ki+1 of a node. Update k and index so that they still |
765 | * point to the same place in the transformed tree. Assumes both |
766 | * children _are_ sufficiently small. |
767 | * |
768 | * . B . . |
769 | * / \ -> | |
770 | * a A b c C d a A b B c C d |
771 | * |
772 | * This routine can also cope with either child being undersized: |
773 | * |
774 | * . A . . |
775 | * / \ -> | |
776 | * a b B c a A b B c |
777 | * |
778 | * . A . . |
779 | * / \ -> | |
780 | * a b B c C d a A b B c C d |
781 | */ |
782 | static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) { |
783 | node234 *left, *right; |
784 | int i, leftlen, rightlen, lsize, rsize; |
785 | |
786 | left = n->kids[ki]; leftlen = n->counts[ki]; |
787 | right = n->kids[ki+1]; rightlen = n->counts[ki+1]; |
788 | |
789 | LOG((" trans234_subtree_merge(%p, %d):\n", n, ki)); |
790 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
791 | n, |
792 | n->kids[0], n->counts[0], n->elems[0], |
793 | n->kids[1], n->counts[1], n->elems[1], |
794 | n->kids[2], n->counts[2], n->elems[2], |
795 | n->kids[3], n->counts[3])); |
796 | LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
797 | left, |
798 | left->kids[0], left->counts[0], left->elems[0], |
799 | left->kids[1], left->counts[1], left->elems[1], |
800 | left->kids[2], left->counts[2], left->elems[2], |
801 | left->kids[3], left->counts[3])); |
802 | LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
803 | right, |
804 | right->kids[0], right->counts[0], right->elems[0], |
805 | right->kids[1], right->counts[1], right->elems[1], |
806 | right->kids[2], right->counts[2], right->elems[2], |
807 | right->kids[3], right->counts[3])); |
808 | |
809 | assert(!left->elems[2] && !right->elems[2]); /* neither is large! */ |
810 | lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0); |
811 | rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0); |
812 | |
813 | left->elems[lsize] = n->elems[ki]; |
814 | |
815 | for (i = 0; i < rsize+1; i++) { |
816 | left->kids[lsize+1+i] = right->kids[i]; |
817 | left->counts[lsize+1+i] = right->counts[i]; |
818 | if (left->kids[lsize+1+i]) |
819 | left->kids[lsize+1+i]->parent = left; |
820 | if (i < rsize) |
821 | left->elems[lsize+1+i] = right->elems[i]; |
822 | } |
823 | |
824 | n->counts[ki] += rightlen + 1; |
825 | |
826 | sfree(right); |
827 | |
828 | /* |
829 | * Move the rest of n up by one. |
830 | */ |
831 | for (i = ki+1; i < 3; i++) { |
832 | n->kids[i] = n->kids[i+1]; |
833 | n->counts[i] = n->counts[i+1]; |
834 | } |
835 | for (i = ki; i < 2; i++) { |
836 | n->elems[i] = n->elems[i+1]; |
837 | } |
838 | n->kids[3] = NULL; |
839 | n->counts[3] = 0; |
840 | n->elems[2] = NULL; |
841 | |
842 | if (k) { |
843 | LOG((" before: k,index = %d,%d\n", (*k), (*index))); |
844 | if ((*k) == ki+1) { |
845 | (*k)--; |
846 | (*index) += leftlen + 1; |
847 | } else if ((*k) > ki+1) { |
848 | (*k)--; |
849 | } |
850 | LOG((" after: k,index = %d,%d\n", (*k), (*index))); |
851 | } |
852 | |
853 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
854 | n, |
855 | n->kids[0], n->counts[0], n->elems[0], |
856 | n->kids[1], n->counts[1], n->elems[1], |
857 | n->kids[2], n->counts[2], n->elems[2], |
858 | n->kids[3], n->counts[3])); |
859 | LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
860 | left, |
861 | left->kids[0], left->counts[0], left->elems[0], |
862 | left->kids[1], left->counts[1], left->elems[1], |
863 | left->kids[2], left->counts[2], left->elems[2], |
864 | left->kids[3], left->counts[3])); |
865 | |
866 | } |
867 | |
868 | /* |
869 | * Delete an element e in a 2-3-4 tree. Does not free the element, |
870 | * merely removes all links to it from the tree nodes. |
871 | */ |
872 | static void *delpos234_internal(tree234 *t, int index) { |
873 | node234 *n; |
874 | void *retval; |
875 | int ki, i; |
876 | |
877 | retval = NULL; |
878 | |
879 | n = t->root; /* by assumption this is non-NULL */ |
880 | LOG(("deleting item %d from tree %p\n", index, t)); |
881 | while (1) { |
882 | node234 *sub; |
883 | |
884 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", |
885 | n, |
886 | n->kids[0], n->counts[0], n->elems[0], |
887 | n->kids[1], n->counts[1], n->elems[1], |
888 | n->kids[2], n->counts[2], n->elems[2], |
889 | n->kids[3], n->counts[3], |
890 | index)); |
891 | if (index <= n->counts[0]) { |
892 | ki = 0; |
893 | } else if (index -= n->counts[0]+1, index <= n->counts[1]) { |
894 | ki = 1; |
895 | } else if (index -= n->counts[1]+1, index <= n->counts[2]) { |
896 | ki = 2; |
897 | } else if (index -= n->counts[2]+1, index <= n->counts[3]) { |
898 | ki = 3; |
899 | } else { |
900 | assert(0); /* can't happen */ |
901 | } |
902 | |
903 | if (!n->kids[0]) |
904 | break; /* n is a leaf node; we're here! */ |
905 | |
906 | /* |
907 | * Check to see if we've found our target element. If so, |
908 | * we must choose a new target (we'll use the old target's |
909 | * successor, which will be in a leaf), move it into the |
910 | * place of the old one, continue down to the leaf and |
911 | * delete the old copy of the new target. |
912 | */ |
913 | if (index == n->counts[ki]) { |
914 | node234 *m; |
915 | LOG((" found element in internal node, index %d\n", ki)); |
916 | assert(n->elems[ki]); /* must be a kid _before_ an element */ |
917 | ki++; index = 0; |
918 | for (m = n->kids[ki]; m->kids[0]; m = m->kids[0]) |
919 | continue; |
920 | LOG((" replacing with element \"%s\" from leaf node %p\n", |
921 | m->elems[0], m)); |
922 | retval = n->elems[ki-1]; |
923 | n->elems[ki-1] = m->elems[0]; |
924 | } |
925 | |
926 | /* |
927 | * Recurse down to subtree ki. If it has only one element, |
928 | * we have to do some transformation to start with. |
929 | */ |
930 | LOG((" moving to subtree %d\n", ki)); |
931 | sub = n->kids[ki]; |
932 | if (!sub->elems[1]) { |
933 | LOG((" subtree has only one element!\n")); |
934 | if (ki > 0 && n->kids[ki-1]->elems[1]) { |
935 | /* |
936 | * Child ki has only one element, but child |
937 | * ki-1 has two or more. So we need to move a |
938 | * subtree from ki-1 to ki. |
939 | */ |
940 | trans234_subtree_right(n, ki-1, &ki, &index); |
941 | } else if (ki < 3 && n->kids[ki+1] && |
942 | n->kids[ki+1]->elems[1]) { |
943 | /* |
944 | * Child ki has only one element, but ki+1 has |
945 | * two or more. Move a subtree from ki+1 to ki. |
946 | */ |
947 | trans234_subtree_left(n, ki+1, &ki, &index); |
948 | } else { |
949 | /* |
950 | * ki is small with only small neighbours. Pick a |
951 | * neighbour and merge with it. |
952 | */ |
953 | trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index); |
954 | sub = n->kids[ki]; |
955 | |
956 | if (!n->elems[0]) { |
957 | /* |
958 | * The root is empty and needs to be |
959 | * removed. |
960 | */ |
961 | LOG((" shifting root!\n")); |
962 | t->root = sub; |
963 | sub->parent = NULL; |
964 | sfree(n); |
965 | n = NULL; |
966 | } |
967 | } |
968 | } |
969 | |
970 | if (n) |
971 | n->counts[ki]--; |
972 | n = sub; |
973 | } |
974 | |
975 | /* |
976 | * Now n is a leaf node, and ki marks the element number we |
977 | * want to delete. We've already arranged for the leaf to be |
978 | * bigger than minimum size, so let's just go to it. |
979 | */ |
980 | assert(!n->kids[0]); |
981 | if (!retval) |
982 | retval = n->elems[ki]; |
983 | |
984 | for (i = ki; i < 2 && n->elems[i+1]; i++) |
985 | n->elems[i] = n->elems[i+1]; |
986 | n->elems[i] = NULL; |
987 | |
988 | /* |
989 | * It's just possible that we have reduced the leaf to zero |
990 | * size. This can only happen if it was the root - so destroy |
991 | * it and make the tree empty. |
992 | */ |
993 | if (!n->elems[0]) { |
994 | LOG((" removed last element in tree, destroying empty root\n")); |
995 | assert(n == t->root); |
996 | sfree(n); |
997 | t->root = NULL; |
998 | } |
999 | |
1000 | return retval; /* finished! */ |
1001 | } |
1002 | void *delpos234(tree234 *t, int index) { |
1003 | if (index < 0 || index >= countnode234(t->root)) |
1004 | return NULL; |
1005 | return delpos234_internal(t, index); |
1006 | } |
1007 | void *del234(tree234 *t, void *e) { |
1008 | int index; |
1009 | if (!findrelpos234(t, e, NULL, REL234_EQ, &index)) |
1010 | return NULL; /* it wasn't in there anyway */ |
1011 | return delpos234_internal(t, index); /* it's there; delete it. */ |
1012 | } |
1013 | |
1014 | /* |
1015 | * Join two subtrees together with a separator element between |
1016 | * them, given their relative height. |
1017 | * |
1018 | * (Height<0 means the left tree is shorter, >0 means the right |
1019 | * tree is shorter, =0 means (duh) they're equal.) |
1020 | * |
1021 | * It is assumed that any checks needed on the ordering criterion |
1022 | * have _already_ been done. |
1023 | * |
1024 | * The value returned in `height' is 0 or 1 depending on whether the |
1025 | * resulting tree is the same height as the original larger one, or |
1026 | * one higher. |
1027 | */ |
1028 | static node234 *join234_internal(node234 *left, void *sep, |
1029 | node234 *right, int *height) { |
1030 | node234 *root, *node; |
1031 | int relht = *height; |
1032 | int ki; |
1033 | |
1034 | LOG((" join: joining %p \"%s\" %p, relative height is %d\n", |
1035 | left, sep, right, relht)); |
1036 | if (relht == 0) { |
1037 | /* |
1038 | * The trees are the same height. Create a new one-element |
1039 | * root containing the separator and pointers to the two |
1040 | * nodes. |
1041 | */ |
1042 | node234 *newroot; |
24055572 |
1043 | newroot = snew(node234); |
720a8fb7 |
1044 | newroot->kids[0] = left; newroot->counts[0] = countnode234(left); |
1045 | newroot->elems[0] = sep; |
1046 | newroot->kids[1] = right; newroot->counts[1] = countnode234(right); |
1047 | newroot->elems[1] = NULL; |
1048 | newroot->kids[2] = NULL; newroot->counts[2] = 0; |
1049 | newroot->elems[2] = NULL; |
1050 | newroot->kids[3] = NULL; newroot->counts[3] = 0; |
1051 | newroot->parent = NULL; |
1052 | if (left) left->parent = newroot; |
1053 | if (right) right->parent = newroot; |
1054 | *height = 1; |
1055 | LOG((" join: same height, brand new root\n")); |
1056 | return newroot; |
1057 | } |
1058 | |
1059 | /* |
1060 | * This now works like the addition algorithm on the larger |
1061 | * tree. We're replacing a single kid pointer with two kid |
1062 | * pointers separated by an element; if that causes the node to |
1063 | * overload, we split it in two, move a separator element up to |
1064 | * the next node, and repeat. |
1065 | */ |
1066 | if (relht < 0) { |
1067 | /* |
1068 | * Left tree is shorter. Search down the right tree to find |
1069 | * the pointer we're inserting at. |
1070 | */ |
1071 | node = root = right; |
1072 | while (++relht < 0) { |
1073 | node = node->kids[0]; |
1074 | } |
1075 | ki = 0; |
1076 | right = node->kids[ki]; |
1077 | } else { |
1078 | /* |
1079 | * Right tree is shorter; search down the left to find the |
1080 | * pointer we're inserting at. |
1081 | */ |
1082 | node = root = left; |
1083 | while (--relht > 0) { |
1084 | if (node->elems[2]) |
1085 | node = node->kids[3]; |
1086 | else if (node->elems[1]) |
1087 | node = node->kids[2]; |
1088 | else |
1089 | node = node->kids[1]; |
1090 | } |
1091 | if (node->elems[2]) |
1092 | ki = 3; |
1093 | else if (node->elems[1]) |
1094 | ki = 2; |
1095 | else |
1096 | ki = 1; |
1097 | left = node->kids[ki]; |
1098 | } |
1099 | |
1100 | /* |
1101 | * Now proceed as for addition. |
1102 | */ |
1103 | *height = add234_insert(left, sep, right, &root, node, ki); |
1104 | |
1105 | return root; |
1106 | } |
1107 | static int height234(tree234 *t) { |
1108 | int level = 0; |
1109 | node234 *n = t->root; |
1110 | while (n) { |
1111 | level++; |
1112 | n = n->kids[0]; |
1113 | } |
1114 | return level; |
1115 | } |
1116 | tree234 *join234(tree234 *t1, tree234 *t2) { |
1117 | int size2 = countnode234(t2->root); |
1118 | if (size2 > 0) { |
1119 | void *element; |
1120 | int relht; |
1121 | |
1122 | if (t1->cmp) { |
1123 | element = index234(t2, 0); |
1124 | element = findrelpos234(t1, element, NULL, REL234_GE, NULL); |
1125 | if (element) |
1126 | return NULL; |
1127 | } |
1128 | |
1129 | element = delpos234(t2, 0); |
1130 | relht = height234(t1) - height234(t2); |
1131 | t1->root = join234_internal(t1->root, element, t2->root, &relht); |
1132 | t2->root = NULL; |
1133 | } |
1134 | return t1; |
1135 | } |
1136 | tree234 *join234r(tree234 *t1, tree234 *t2) { |
1137 | int size1 = countnode234(t1->root); |
1138 | if (size1 > 0) { |
1139 | void *element; |
1140 | int relht; |
1141 | |
1142 | if (t2->cmp) { |
1143 | element = index234(t1, size1-1); |
1144 | element = findrelpos234(t2, element, NULL, REL234_LE, NULL); |
1145 | if (element) |
1146 | return NULL; |
1147 | } |
1148 | |
1149 | element = delpos234(t1, size1-1); |
1150 | relht = height234(t1) - height234(t2); |
1151 | t2->root = join234_internal(t1->root, element, t2->root, &relht); |
1152 | t1->root = NULL; |
1153 | } |
1154 | return t2; |
1155 | } |
1156 | |
1157 | /* |
1158 | * Split out the first <index> elements in a tree and return a |
1159 | * pointer to the root node. Leave the root node of the remainder |
1160 | * in t. |
1161 | */ |
1162 | static node234 *split234_internal(tree234 *t, int index) { |
1163 | node234 *halves[2], *n, *sib, *sub; |
1164 | node234 *lparent, *rparent; |
1165 | int ki, pki, i, half, lcount, rcount; |
1166 | |
1167 | n = t->root; |
1168 | LOG(("splitting tree %p at point %d\n", t, index)); |
1169 | |
1170 | /* |
1171 | * Easy special cases. After this we have also dealt completely |
1172 | * with the empty-tree case and we can assume the root exists. |
1173 | */ |
1174 | if (index == 0) /* return nothing */ |
1175 | return NULL; |
1176 | if (index == countnode234(t->root)) { /* return the whole tree */ |
1177 | node234 *ret = t->root; |
1178 | t->root = NULL; |
1179 | return ret; |
1180 | } |
1181 | |
1182 | /* |
1183 | * Search down the tree to find the split point. |
1184 | */ |
b1bd5731 |
1185 | halves[0] = halves[1] = NULL; |
720a8fb7 |
1186 | lparent = rparent = NULL; |
1187 | pki = -1; |
1188 | while (n) { |
1189 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", |
1190 | n, |
1191 | n->kids[0], n->counts[0], n->elems[0], |
1192 | n->kids[1], n->counts[1], n->elems[1], |
1193 | n->kids[2], n->counts[2], n->elems[2], |
1194 | n->kids[3], n->counts[3], |
1195 | index)); |
1196 | lcount = index; |
1197 | rcount = countnode234(n) - lcount; |
1198 | if (index <= n->counts[0]) { |
1199 | ki = 0; |
1200 | } else if (index -= n->counts[0]+1, index <= n->counts[1]) { |
1201 | ki = 1; |
1202 | } else if (index -= n->counts[1]+1, index <= n->counts[2]) { |
1203 | ki = 2; |
1204 | } else { |
1205 | index -= n->counts[2]+1; |
1206 | ki = 3; |
1207 | } |
1208 | |
1209 | LOG((" splitting at subtree %d\n", ki)); |
1210 | sub = n->kids[ki]; |
1211 | |
1212 | LOG((" splitting at child index %d\n", ki)); |
1213 | |
1214 | /* |
1215 | * Split the node, put halves[0] on the right of the left |
1216 | * one and halves[1] on the left of the right one, put the |
1217 | * new node pointers in halves[0] and halves[1], and go up |
1218 | * a level. |
1219 | */ |
24055572 |
1220 | sib = snew(node234); |
720a8fb7 |
1221 | for (i = 0; i < 3; i++) { |
1222 | if (i+ki < 3 && n->elems[i+ki]) { |
1223 | sib->elems[i] = n->elems[i+ki]; |
1224 | sib->kids[i+1] = n->kids[i+ki+1]; |
1225 | if (sib->kids[i+1]) sib->kids[i+1]->parent = sib; |
1226 | sib->counts[i+1] = n->counts[i+ki+1]; |
1227 | n->elems[i+ki] = NULL; |
1228 | n->kids[i+ki+1] = NULL; |
1229 | n->counts[i+ki+1] = 0; |
1230 | } else { |
1231 | sib->elems[i] = NULL; |
1232 | sib->kids[i+1] = NULL; |
1233 | sib->counts[i+1] = 0; |
1234 | } |
1235 | } |
1236 | if (lparent) { |
1237 | lparent->kids[pki] = n; |
1238 | lparent->counts[pki] = lcount; |
1239 | n->parent = lparent; |
1240 | rparent->kids[0] = sib; |
1241 | rparent->counts[0] = rcount; |
1242 | sib->parent = rparent; |
1243 | } else { |
1244 | halves[0] = n; |
1245 | n->parent = NULL; |
1246 | halves[1] = sib; |
1247 | sib->parent = NULL; |
1248 | } |
1249 | lparent = n; |
1250 | rparent = sib; |
1251 | pki = ki; |
1252 | LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
1253 | n, |
1254 | n->kids[0], n->counts[0], n->elems[0], |
1255 | n->kids[1], n->counts[1], n->elems[1], |
1256 | n->kids[2], n->counts[2], n->elems[2], |
1257 | n->kids[3], n->counts[3])); |
1258 | LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
1259 | sib, |
1260 | sib->kids[0], sib->counts[0], sib->elems[0], |
1261 | sib->kids[1], sib->counts[1], sib->elems[1], |
1262 | sib->kids[2], sib->counts[2], sib->elems[2], |
1263 | sib->kids[3], sib->counts[3])); |
1264 | |
1265 | n = sub; |
1266 | } |
1267 | |
1268 | /* |
1269 | * We've come off the bottom here, so we've successfully split |
1270 | * the tree into two equally high subtrees. The only problem is |
1271 | * that some of the nodes down the fault line will be smaller |
1272 | * than the minimum permitted size. (Since this is a 2-3-4 |
1273 | * tree, that means they'll be zero-element one-child nodes.) |
1274 | */ |
1275 | LOG((" fell off bottom, lroot is %p, rroot is %p\n", |
1276 | halves[0], halves[1])); |
b1bd5731 |
1277 | assert(halves[0] != NULL); |
1278 | assert(halves[1] != NULL); |
720a8fb7 |
1279 | lparent->counts[pki] = rparent->counts[0] = 0; |
1280 | lparent->kids[pki] = rparent->kids[0] = NULL; |
1281 | |
1282 | /* |
1283 | * So now we go back down the tree from each of the two roots, |
1284 | * fixing up undersize nodes. |
1285 | */ |
1286 | for (half = 0; half < 2; half++) { |
1287 | /* |
1288 | * Remove the root if it's undersize (it will contain only |
1289 | * one child pointer, so just throw it away and replace it |
1290 | * with its child). This might happen several times. |
1291 | */ |
1292 | while (halves[half] && !halves[half]->elems[0]) { |
1293 | LOG((" root %p is undersize, throwing away\n", halves[half])); |
1294 | halves[half] = halves[half]->kids[0]; |
1295 | sfree(halves[half]->parent); |
1296 | halves[half]->parent = NULL; |
1297 | LOG((" new root is %p\n", halves[half])); |
1298 | } |
1299 | |
1300 | n = halves[half]; |
1301 | while (n) { |
1302 | void (*toward)(node234 *n, int ki, int *k, int *index); |
1303 | int ni, merge; |
1304 | |
1305 | /* |
1306 | * Now we have a potentially undersize node on the |
1307 | * right (if half==0) or left (if half==1). Sort it |
1308 | * out, by merging with a neighbour or by transferring |
1309 | * subtrees over. At this time we must also ensure that |
1310 | * nodes are bigger than minimum, in case we need an |
1311 | * element to merge two nodes below. |
1312 | */ |
1313 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
1314 | n, |
1315 | n->kids[0], n->counts[0], n->elems[0], |
1316 | n->kids[1], n->counts[1], n->elems[1], |
1317 | n->kids[2], n->counts[2], n->elems[2], |
1318 | n->kids[3], n->counts[3])); |
1319 | if (half == 1) { |
1320 | ki = 0; /* the kid we're interested in */ |
1321 | ni = 1; /* the neighbour */ |
1322 | merge = 0; /* for merge: leftmost of the two */ |
1323 | toward = trans234_subtree_left; |
1324 | } else { |
1325 | ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1); |
1326 | ni = ki-1; |
1327 | merge = ni; |
1328 | toward = trans234_subtree_right; |
1329 | } |
1330 | |
1331 | sub = n->kids[ki]; |
1332 | if (sub && !sub->elems[1]) { |
1333 | /* |
1334 | * This node is undersized or minimum-size. If we |
1335 | * can merge it with its neighbour, we do so; |
1336 | * otherwise we must be able to transfer subtrees |
1337 | * over to it until it is greater than minimum |
1338 | * size. |
1339 | */ |
1340 | int undersized = (!sub->elems[0]); |
1341 | LOG((" child %d is %ssize\n", ki, |
1342 | undersized ? "under" : "minimum-")); |
1343 | LOG((" neighbour is %s\n", |
1344 | n->kids[ni]->elems[2] ? "large" : |
1345 | n->kids[ni]->elems[1] ? "medium" : "small")); |
1346 | if (!n->kids[ni]->elems[1] || |
1347 | (undersized && !n->kids[ni]->elems[2])) { |
1348 | /* |
1349 | * Neighbour is small, or possibly neighbour is |
1350 | * medium and we are undersize. |
1351 | */ |
1352 | trans234_subtree_merge(n, merge, NULL, NULL); |
1353 | sub = n->kids[merge]; |
1354 | if (!n->elems[0]) { |
1355 | /* |
1356 | * n is empty, and hence must have been the |
1357 | * root and needs to be removed. |
1358 | */ |
1359 | assert(!n->parent); |
1360 | LOG((" shifting root!\n")); |
1361 | halves[half] = sub; |
1362 | halves[half]->parent = NULL; |
1363 | sfree(n); |
1364 | } |
1365 | } else { |
1366 | /* Neighbour is big enough to move trees over. */ |
1367 | toward(n, ni, NULL, NULL); |
1368 | if (undersized) |
1369 | toward(n, ni, NULL, NULL); |
1370 | } |
1371 | } |
1372 | n = sub; |
1373 | } |
1374 | } |
1375 | |
1376 | t->root = halves[1]; |
1377 | return halves[0]; |
1378 | } |
1379 | tree234 *splitpos234(tree234 *t, int index, int before) { |
1380 | tree234 *ret; |
1381 | node234 *n; |
1382 | int count; |
1383 | |
1384 | count = countnode234(t->root); |
1385 | if (index < 0 || index > count) |
1386 | return NULL; /* error */ |
1387 | ret = newtree234(t->cmp); |
1388 | n = split234_internal(t, index); |
1389 | if (before) { |
1390 | /* We want to return the ones before the index. */ |
1391 | ret->root = n; |
1392 | } else { |
1393 | /* |
1394 | * We want to keep the ones before the index and return the |
1395 | * ones after. |
1396 | */ |
1397 | ret->root = t->root; |
1398 | t->root = n; |
1399 | } |
1400 | return ret; |
1401 | } |
1402 | tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) { |
1403 | int before; |
1404 | int index; |
1405 | |
1406 | assert(rel != REL234_EQ); |
1407 | |
1408 | if (rel == REL234_GT || rel == REL234_GE) { |
1409 | before = 1; |
1410 | rel = (rel == REL234_GT ? REL234_LE : REL234_LT); |
1411 | } else { |
1412 | before = 0; |
1413 | } |
1414 | if (!findrelpos234(t, e, cmp, rel, &index)) |
1415 | index = 0; |
1416 | |
1417 | return splitpos234(t, index+1, before); |
1418 | } |
1419 | |
1420 | static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) { |
1421 | int i; |
24055572 |
1422 | node234 *n2 = snew(node234); |
720a8fb7 |
1423 | |
1424 | for (i = 0; i < 3; i++) { |
1425 | if (n->elems[i] && copyfn) |
1426 | n2->elems[i] = copyfn(copyfnstate, n->elems[i]); |
1427 | else |
1428 | n2->elems[i] = n->elems[i]; |
1429 | } |
1430 | |
1431 | for (i = 0; i < 4; i++) { |
1432 | if (n->kids[i]) { |
1433 | n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate); |
1434 | n2->kids[i]->parent = n2; |
1435 | } else { |
1436 | n2->kids[i] = NULL; |
1437 | } |
1438 | n2->counts[i] = n->counts[i]; |
1439 | } |
1440 | |
1441 | return n2; |
1442 | } |
1443 | tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) { |
1444 | tree234 *t2; |
1445 | |
1446 | t2 = newtree234(t->cmp); |
7f82318f |
1447 | if (t->root) { |
1448 | t2->root = copynode234(t->root, copyfn, copyfnstate); |
1449 | t2->root->parent = NULL; |
1450 | } else |
1451 | t2->root = NULL; |
720a8fb7 |
1452 | |
1453 | return t2; |
1454 | } |
1455 | |
1456 | #ifdef TEST |
1457 | |
1458 | /* |
1459 | * Test code for the 2-3-4 tree. This code maintains an alternative |
1460 | * representation of the data in the tree, in an array (using the |
1461 | * obvious and slow insert and delete functions). After each tree |
1462 | * operation, the verify() function is called, which ensures all |
1463 | * the tree properties are preserved: |
1464 | * - node->child->parent always equals node |
1465 | * - tree->root->parent always equals NULL |
1466 | * - number of kids == 0 or number of elements + 1; |
1467 | * - tree has the same depth everywhere |
1468 | * - every node has at least one element |
1469 | * - subtree element counts are accurate |
1470 | * - any NULL kid pointer is accompanied by a zero count |
1471 | * - in a sorted tree: ordering property between elements of a |
1472 | * node and elements of its children is preserved |
1473 | * and also ensures the list represented by the tree is the same |
1474 | * list it should be. (This last check also doubly verifies the |
1475 | * ordering properties, because the `same list it should be' is by |
1476 | * definition correctly ordered. It also ensures all nodes are |
1477 | * distinct, because the enum functions would get caught in a loop |
1478 | * if not.) |
1479 | */ |
1480 | |
b1bd5731 |
1481 | #include <string.h> |
720a8fb7 |
1482 | #include <stdarg.h> |
1483 | |
1484 | #define srealloc realloc |
1485 | |
1486 | /* |
1487 | * Error reporting function. |
1488 | */ |
1489 | void error(char *fmt, ...) { |
1490 | va_list ap; |
1491 | printf("ERROR: "); |
1492 | va_start(ap, fmt); |
1493 | vfprintf(stdout, fmt, ap); |
1494 | va_end(ap); |
1495 | printf("\n"); |
1496 | } |
1497 | |
1498 | /* The array representation of the data. */ |
1499 | void **array; |
1500 | int arraylen, arraysize; |
1501 | cmpfn234 cmp; |
1502 | |
1503 | /* The tree representation of the same data. */ |
1504 | tree234 *tree; |
1505 | |
1506 | /* |
1507 | * Routines to provide a diagnostic printout of a tree. Currently |
1508 | * relies on every element in the tree being a one-character string |
1509 | * :-) |
1510 | */ |
1511 | typedef struct { |
1512 | char **levels; |
1513 | } dispctx; |
1514 | |
1515 | int dispnode(node234 *n, int level, dispctx *ctx) { |
1516 | if (level == 0) { |
1517 | int xpos = strlen(ctx->levels[0]); |
1518 | int len; |
1519 | |
1520 | if (n->elems[2]) |
1521 | len = sprintf(ctx->levels[0]+xpos, " %s%s%s", |
1522 | n->elems[0], n->elems[1], n->elems[2]); |
1523 | else if (n->elems[1]) |
1524 | len = sprintf(ctx->levels[0]+xpos, " %s%s", |
1525 | n->elems[0], n->elems[1]); |
1526 | else |
1527 | len = sprintf(ctx->levels[0]+xpos, " %s", |
1528 | n->elems[0]); |
1529 | return xpos + 1 + (len-1) / 2; |
1530 | } else { |
1531 | int xpos[4], nkids; |
1532 | int nodelen, mypos, myleft, x, i; |
1533 | |
1534 | xpos[0] = dispnode(n->kids[0], level-3, ctx); |
1535 | xpos[1] = dispnode(n->kids[1], level-3, ctx); |
1536 | nkids = 2; |
1537 | if (n->kids[2]) { |
1538 | xpos[2] = dispnode(n->kids[2], level-3, ctx); |
1539 | nkids = 3; |
1540 | } |
1541 | if (n->kids[3]) { |
1542 | xpos[3] = dispnode(n->kids[3], level-3, ctx); |
1543 | nkids = 4; |
1544 | } |
1545 | |
1546 | if (nkids == 4) |
1547 | mypos = (xpos[1] + xpos[2]) / 2; |
1548 | else if (nkids == 3) |
1549 | mypos = xpos[1]; |
1550 | else |
1551 | mypos = (xpos[0] + xpos[1]) / 2; |
1552 | nodelen = nkids * 2 - 1; |
1553 | myleft = mypos - ((nodelen-1)/2); |
1554 | assert(myleft >= xpos[0]); |
1555 | assert(myleft + nodelen-1 <= xpos[nkids-1]); |
1556 | |
1557 | x = strlen(ctx->levels[level]); |
1558 | while (x <= xpos[0] && x < myleft) |
1559 | ctx->levels[level][x++] = ' '; |
1560 | while (x < myleft) |
1561 | ctx->levels[level][x++] = '_'; |
1562 | if (nkids==4) |
1563 | x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.", |
1564 | n->elems[0], n->elems[1], n->elems[2]); |
1565 | else if (nkids==3) |
1566 | x += sprintf(ctx->levels[level]+x, ".%s.%s.", |
1567 | n->elems[0], n->elems[1]); |
1568 | else |
1569 | x += sprintf(ctx->levels[level]+x, ".%s.", |
1570 | n->elems[0]); |
1571 | while (x < xpos[nkids-1]) |
1572 | ctx->levels[level][x++] = '_'; |
1573 | ctx->levels[level][x] = '\0'; |
1574 | |
1575 | x = strlen(ctx->levels[level-1]); |
1576 | for (i = 0; i < nkids; i++) { |
1577 | int rpos, pos; |
1578 | rpos = xpos[i]; |
1579 | if (i > 0 && i < nkids-1) |
1580 | pos = myleft + 2*i; |
1581 | else |
1582 | pos = rpos; |
1583 | if (rpos < pos) |
1584 | rpos++; |
1585 | while (x < pos && x < rpos) |
1586 | ctx->levels[level-1][x++] = ' '; |
1587 | if (x == pos) |
1588 | ctx->levels[level-1][x++] = '|'; |
1589 | while (x < pos || x < rpos) |
1590 | ctx->levels[level-1][x++] = '_'; |
1591 | if (x == pos) |
1592 | ctx->levels[level-1][x++] = '|'; |
1593 | } |
1594 | ctx->levels[level-1][x] = '\0'; |
1595 | |
1596 | x = strlen(ctx->levels[level-2]); |
1597 | for (i = 0; i < nkids; i++) { |
1598 | int rpos = xpos[i]; |
1599 | |
1600 | while (x < rpos) |
1601 | ctx->levels[level-2][x++] = ' '; |
1602 | ctx->levels[level-2][x++] = '|'; |
1603 | } |
1604 | ctx->levels[level-2][x] = '\0'; |
1605 | |
1606 | return mypos; |
1607 | } |
1608 | } |
1609 | |
1610 | void disptree(tree234 *t) { |
1611 | dispctx ctx; |
1612 | char *leveldata; |
1613 | int width = count234(t); |
1614 | int ht = height234(t) * 3 - 2; |
1615 | int i; |
1616 | |
1617 | if (!t->root) { |
1618 | printf("[empty tree]\n"); |
1619 | } |
1620 | |
1621 | leveldata = smalloc(ht * (width+2)); |
1622 | ctx.levels = smalloc(ht * sizeof(char *)); |
1623 | for (i = 0; i < ht; i++) { |
1624 | ctx.levels[i] = leveldata + i * (width+2); |
1625 | ctx.levels[i][0] = '\0'; |
1626 | } |
1627 | |
1628 | (void) dispnode(t->root, ht-1, &ctx); |
1629 | |
1630 | for (i = ht; i-- ;) |
1631 | printf("%s\n", ctx.levels[i]); |
1632 | |
1633 | sfree(ctx.levels); |
1634 | sfree(leveldata); |
1635 | } |
1636 | |
1637 | typedef struct { |
1638 | int treedepth; |
1639 | int elemcount; |
1640 | } chkctx; |
1641 | |
1642 | int chknode(chkctx *ctx, int level, node234 *node, |
1643 | void *lowbound, void *highbound) { |
1644 | int nkids, nelems; |
1645 | int i; |
1646 | int count; |
1647 | |
1648 | /* Count the non-NULL kids. */ |
1649 | for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++); |
1650 | /* Ensure no kids beyond the first NULL are non-NULL. */ |
1651 | for (i = nkids; i < 4; i++) |
1652 | if (node->kids[i]) { |
1653 | error("node %p: nkids=%d but kids[%d] non-NULL", |
1654 | node, nkids, i); |
1655 | } else if (node->counts[i]) { |
1656 | error("node %p: kids[%d] NULL but count[%d]=%d nonzero", |
1657 | node, i, i, node->counts[i]); |
1658 | } |
1659 | |
1660 | /* Count the non-NULL elements. */ |
1661 | for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++); |
1662 | /* Ensure no elements beyond the first NULL are non-NULL. */ |
1663 | for (i = nelems; i < 3; i++) |
1664 | if (node->elems[i]) { |
1665 | error("node %p: nelems=%d but elems[%d] non-NULL", |
1666 | node, nelems, i); |
1667 | } |
1668 | |
1669 | if (nkids == 0) { |
1670 | /* |
1671 | * If nkids==0, this is a leaf node; verify that the tree |
1672 | * depth is the same everywhere. |
1673 | */ |
1674 | if (ctx->treedepth < 0) |
1675 | ctx->treedepth = level; /* we didn't know the depth yet */ |
1676 | else if (ctx->treedepth != level) |
1677 | error("node %p: leaf at depth %d, previously seen depth %d", |
1678 | node, level, ctx->treedepth); |
1679 | } else { |
1680 | /* |
1681 | * If nkids != 0, then it should be nelems+1, unless nelems |
1682 | * is 0 in which case nkids should also be 0 (and so we |
1683 | * shouldn't be in this condition at all). |
1684 | */ |
1685 | int shouldkids = (nelems ? nelems+1 : 0); |
1686 | if (nkids != shouldkids) { |
1687 | error("node %p: %d elems should mean %d kids but has %d", |
1688 | node, nelems, shouldkids, nkids); |
1689 | } |
1690 | } |
1691 | |
1692 | /* |
1693 | * nelems should be at least 1. |
1694 | */ |
1695 | if (nelems == 0) { |
1696 | error("node %p: no elems", node, nkids); |
1697 | } |
1698 | |
1699 | /* |
1700 | * Add nelems to the running element count of the whole tree. |
1701 | */ |
1702 | ctx->elemcount += nelems; |
1703 | |
1704 | /* |
1705 | * Check ordering property: all elements should be strictly > |
1706 | * lowbound, strictly < highbound, and strictly < each other in |
1707 | * sequence. (lowbound and highbound are NULL at edges of tree |
1708 | * - both NULL at root node - and NULL is considered to be < |
1709 | * everything and > everything. IYSWIM.) |
1710 | */ |
1711 | if (cmp) { |
1712 | for (i = -1; i < nelems; i++) { |
1713 | void *lower = (i == -1 ? lowbound : node->elems[i]); |
1714 | void *higher = (i+1 == nelems ? highbound : node->elems[i+1]); |
1715 | if (lower && higher && cmp(lower, higher) >= 0) { |
1716 | error("node %p: kid comparison [%d=%s,%d=%s] failed", |
1717 | node, i, lower, i+1, higher); |
1718 | } |
1719 | } |
1720 | } |
1721 | |
1722 | /* |
1723 | * Check parent pointers: all non-NULL kids should have a |
1724 | * parent pointer coming back to this node. |
1725 | */ |
1726 | for (i = 0; i < nkids; i++) |
1727 | if (node->kids[i]->parent != node) { |
1728 | error("node %p kid %d: parent ptr is %p not %p", |
1729 | node, i, node->kids[i]->parent, node); |
1730 | } |
1731 | |
1732 | |
1733 | /* |
1734 | * Now (finally!) recurse into subtrees. |
1735 | */ |
1736 | count = nelems; |
1737 | |
1738 | for (i = 0; i < nkids; i++) { |
1739 | void *lower = (i == 0 ? lowbound : node->elems[i-1]); |
1740 | void *higher = (i >= nelems ? highbound : node->elems[i]); |
1741 | int subcount = chknode(ctx, level+1, node->kids[i], lower, higher); |
1742 | if (node->counts[i] != subcount) { |
1743 | error("node %p kid %d: count says %d, subtree really has %d", |
1744 | node, i, node->counts[i], subcount); |
1745 | } |
1746 | count += subcount; |
1747 | } |
1748 | |
1749 | return count; |
1750 | } |
1751 | |
1752 | void verifytree(tree234 *tree, void **array, int arraylen) { |
1753 | chkctx ctx; |
1754 | int i; |
1755 | void *p; |
1756 | |
1757 | ctx.treedepth = -1; /* depth unknown yet */ |
1758 | ctx.elemcount = 0; /* no elements seen yet */ |
1759 | /* |
1760 | * Verify validity of tree properties. |
1761 | */ |
1762 | if (tree->root) { |
1763 | if (tree->root->parent != NULL) |
1764 | error("root->parent is %p should be null", tree->root->parent); |
1765 | chknode(&ctx, 0, tree->root, NULL, NULL); |
1766 | } |
1767 | printf("tree depth: %d\n", ctx.treedepth); |
1768 | /* |
1769 | * Enumerate the tree and ensure it matches up to the array. |
1770 | */ |
1771 | for (i = 0; NULL != (p = index234(tree, i)); i++) { |
1772 | if (i >= arraylen) |
1773 | error("tree contains more than %d elements", arraylen); |
1774 | if (array[i] != p) |
1775 | error("enum at position %d: array says %s, tree says %s", |
1776 | i, array[i], p); |
1777 | } |
1778 | if (ctx.elemcount != i) { |
1779 | error("tree really contains %d elements, enum gave %d", |
1780 | ctx.elemcount, i); |
1781 | } |
1782 | if (i < arraylen) { |
1783 | error("enum gave only %d elements, array has %d", i, arraylen); |
1784 | } |
1785 | i = count234(tree); |
1786 | if (ctx.elemcount != i) { |
1787 | error("tree really contains %d elements, count234 gave %d", |
1788 | ctx.elemcount, i); |
1789 | } |
1790 | } |
1791 | void verify(void) { verifytree(tree, array, arraylen); } |
1792 | |
1793 | void internal_addtest(void *elem, int index, void *realret) { |
1794 | int i, j; |
1795 | void *retval; |
1796 | |
1797 | if (arraysize < arraylen+1) { |
1798 | arraysize = arraylen+1+256; |
1799 | array = (array == NULL ? smalloc(arraysize*sizeof(*array)) : |
1800 | srealloc(array, arraysize*sizeof(*array))); |
1801 | } |
1802 | |
1803 | i = index; |
1804 | /* now i points to the first element >= elem */ |
1805 | retval = elem; /* expect elem returned (success) */ |
1806 | for (j = arraylen; j > i; j--) |
1807 | array[j] = array[j-1]; |
1808 | array[i] = elem; /* add elem to array */ |
1809 | arraylen++; |
1810 | |
1811 | if (realret != retval) { |
1812 | error("add: retval was %p expected %p", realret, retval); |
1813 | } |
1814 | |
1815 | verify(); |
1816 | } |
1817 | |
1818 | void addtest(void *elem) { |
1819 | int i; |
1820 | void *realret; |
1821 | |
1822 | realret = add234(tree, elem); |
1823 | |
1824 | i = 0; |
1825 | while (i < arraylen && cmp(elem, array[i]) > 0) |
1826 | i++; |
1827 | if (i < arraylen && !cmp(elem, array[i])) { |
1828 | void *retval = array[i]; /* expect that returned not elem */ |
1829 | if (realret != retval) { |
1830 | error("add: retval was %p expected %p", realret, retval); |
1831 | } |
1832 | } else |
1833 | internal_addtest(elem, i, realret); |
1834 | } |
1835 | |
1836 | void addpostest(void *elem, int i) { |
1837 | void *realret; |
1838 | |
1839 | realret = addpos234(tree, elem, i); |
1840 | |
1841 | internal_addtest(elem, i, realret); |
1842 | } |
1843 | |
1844 | void delpostest(int i) { |
1845 | int index = i; |
1846 | void *elem = array[i], *ret; |
1847 | |
1848 | /* i points to the right element */ |
1849 | while (i < arraylen-1) { |
1850 | array[i] = array[i+1]; |
1851 | i++; |
1852 | } |
1853 | arraylen--; /* delete elem from array */ |
1854 | |
1855 | if (tree->cmp) |
1856 | ret = del234(tree, elem); |
1857 | else |
1858 | ret = delpos234(tree, index); |
1859 | |
1860 | if (ret != elem) { |
1861 | error("del returned %p, expected %p", ret, elem); |
1862 | } |
1863 | |
1864 | verify(); |
1865 | } |
1866 | |
1867 | void deltest(void *elem) { |
1868 | int i; |
1869 | |
1870 | i = 0; |
1871 | while (i < arraylen && cmp(elem, array[i]) > 0) |
1872 | i++; |
1873 | if (i >= arraylen || cmp(elem, array[i]) != 0) |
1874 | return; /* don't do it! */ |
1875 | delpostest(i); |
1876 | } |
1877 | |
1878 | /* A sample data set and test utility. Designed for pseudo-randomness, |
1879 | * and yet repeatability. */ |
1880 | |
1881 | /* |
1882 | * This random number generator uses the `portable implementation' |
1883 | * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits; |
1884 | * change it if not. |
1885 | */ |
1886 | int randomnumber(unsigned *seed) { |
1887 | *seed *= 1103515245; |
1888 | *seed += 12345; |
1889 | return ((*seed) / 65536) % 32768; |
1890 | } |
1891 | |
1892 | int mycmp(void *av, void *bv) { |
1893 | char const *a = (char const *)av; |
1894 | char const *b = (char const *)bv; |
1895 | return strcmp(a, b); |
1896 | } |
1897 | |
720a8fb7 |
1898 | char *strings[] = { |
1899 | "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i", |
1900 | "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E", |
1901 | "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u", |
1902 | "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y", |
1903 | "m", "s", "l", "4", |
1904 | #if 0 |
1905 | "a", "ab", "absque", "coram", "de", |
1906 | "palam", "clam", "cum", "ex", "e", |
1907 | "sine", "tenus", "pro", "prae", |
1908 | "banana", "carrot", "cabbage", "broccoli", "onion", "zebra", |
1909 | "penguin", "blancmange", "pangolin", "whale", "hedgehog", |
1910 | "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux", |
1911 | "murfl", "spoo", "breen", "flarn", "octothorpe", |
1912 | "snail", "tiger", "elephant", "octopus", "warthog", "armadillo", |
1913 | "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin", |
1914 | "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper", |
1915 | "wand", "ring", "amulet" |
1916 | #endif |
1917 | }; |
1918 | |
1919 | #define NSTR lenof(strings) |
1920 | |
1921 | void findtest(void) { |
1922 | static const int rels[] = { |
1923 | REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT |
1924 | }; |
1925 | static const char *const relnames[] = { |
1926 | "EQ", "GE", "LE", "LT", "GT" |
1927 | }; |
1928 | int i, j, rel, index; |
1929 | char *p, *ret, *realret, *realret2; |
1930 | int lo, hi, mid, c; |
1931 | |
1932 | for (i = 0; i < (int)NSTR; i++) { |
1933 | p = strings[i]; |
1934 | for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) { |
1935 | rel = rels[j]; |
1936 | |
1937 | lo = 0; hi = arraylen-1; |
1938 | while (lo <= hi) { |
1939 | mid = (lo + hi) / 2; |
1940 | c = strcmp(p, array[mid]); |
1941 | if (c < 0) |
1942 | hi = mid-1; |
1943 | else if (c > 0) |
1944 | lo = mid+1; |
1945 | else |
1946 | break; |
1947 | } |
1948 | |
1949 | if (c == 0) { |
1950 | if (rel == REL234_LT) |
1951 | ret = (mid > 0 ? array[--mid] : NULL); |
1952 | else if (rel == REL234_GT) |
1953 | ret = (mid < arraylen-1 ? array[++mid] : NULL); |
1954 | else |
1955 | ret = array[mid]; |
1956 | } else { |
1957 | assert(lo == hi+1); |
1958 | if (rel == REL234_LT || rel == REL234_LE) { |
1959 | mid = hi; |
1960 | ret = (hi >= 0 ? array[hi] : NULL); |
1961 | } else if (rel == REL234_GT || rel == REL234_GE) { |
1962 | mid = lo; |
1963 | ret = (lo < arraylen ? array[lo] : NULL); |
1964 | } else |
1965 | ret = NULL; |
1966 | } |
1967 | |
1968 | realret = findrelpos234(tree, p, NULL, rel, &index); |
1969 | if (realret != ret) { |
1970 | error("find(\"%s\",%s) gave %s should be %s", |
1971 | p, relnames[j], realret, ret); |
1972 | } |
1973 | if (realret && index != mid) { |
1974 | error("find(\"%s\",%s) gave %d should be %d", |
1975 | p, relnames[j], index, mid); |
1976 | } |
1977 | if (realret && rel == REL234_EQ) { |
1978 | realret2 = index234(tree, index); |
1979 | if (realret2 != realret) { |
1980 | error("find(\"%s\",%s) gave %s(%d) but %d -> %s", |
1981 | p, relnames[j], realret, index, index, realret2); |
1982 | } |
1983 | } |
1984 | #if 0 |
1985 | printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j], |
1986 | realret, index); |
1987 | #endif |
1988 | } |
1989 | } |
1990 | |
1991 | realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index); |
1992 | if (arraylen && (realret != array[0] || index != 0)) { |
1993 | error("find(NULL,GT) gave %s(%d) should be %s(0)", |
1994 | realret, index, array[0]); |
1995 | } else if (!arraylen && (realret != NULL)) { |
1996 | error("find(NULL,GT) gave %s(%d) should be NULL", |
1997 | realret, index); |
1998 | } |
1999 | |
2000 | realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index); |
2001 | if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) { |
2002 | error("find(NULL,LT) gave %s(%d) should be %s(0)", |
2003 | realret, index, array[arraylen-1]); |
2004 | } else if (!arraylen && (realret != NULL)) { |
2005 | error("find(NULL,LT) gave %s(%d) should be NULL", |
2006 | realret, index); |
2007 | } |
2008 | } |
2009 | |
2010 | void splittest(tree234 *tree, void **array, int arraylen) { |
2011 | int i; |
2012 | tree234 *tree3, *tree4; |
2013 | for (i = 0; i <= arraylen; i++) { |
2014 | tree3 = copytree234(tree, NULL, NULL); |
2015 | tree4 = splitpos234(tree3, i, 0); |
2016 | verifytree(tree3, array, i); |
2017 | verifytree(tree4, array+i, arraylen-i); |
2018 | join234(tree3, tree4); |
2019 | freetree234(tree4); /* left empty by join */ |
2020 | verifytree(tree3, array, arraylen); |
2021 | freetree234(tree3); |
2022 | } |
2023 | } |
2024 | |
2025 | int main(void) { |
2026 | int in[NSTR]; |
2027 | int i, j, k; |
2028 | int tworoot, tmplen; |
2029 | unsigned seed = 0; |
2030 | tree234 *tree2, *tree3, *tree4; |
2031 | int c; |
2032 | |
2033 | setvbuf(stdout, NULL, _IOLBF, 0); |
2034 | |
2035 | for (i = 0; i < (int)NSTR; i++) in[i] = 0; |
2036 | array = NULL; |
2037 | arraylen = arraysize = 0; |
2038 | tree = newtree234(mycmp); |
2039 | cmp = mycmp; |
2040 | |
2041 | verify(); |
2042 | for (i = 0; i < 10000; i++) { |
2043 | j = randomnumber(&seed); |
2044 | j %= NSTR; |
2045 | printf("trial: %d\n", i); |
2046 | if (in[j]) { |
2047 | printf("deleting %s (%d)\n", strings[j], j); |
2048 | deltest(strings[j]); |
2049 | in[j] = 0; |
2050 | } else { |
2051 | printf("adding %s (%d)\n", strings[j], j); |
2052 | addtest(strings[j]); |
2053 | in[j] = 1; |
2054 | } |
2055 | disptree(tree); |
2056 | findtest(); |
2057 | } |
2058 | |
2059 | while (arraylen > 0) { |
2060 | j = randomnumber(&seed); |
2061 | j %= arraylen; |
2062 | deltest(array[j]); |
2063 | } |
2064 | |
2065 | freetree234(tree); |
2066 | |
2067 | /* |
2068 | * Now try an unsorted tree. We don't really need to test |
2069 | * delpos234 because we know del234 is based on it, so it's |
2070 | * already been tested in the above sorted-tree code; but for |
2071 | * completeness we'll use it to tear down our unsorted tree |
2072 | * once we've built it. |
2073 | */ |
2074 | tree = newtree234(NULL); |
2075 | cmp = NULL; |
2076 | verify(); |
2077 | for (i = 0; i < 1000; i++) { |
2078 | printf("trial: %d\n", i); |
2079 | j = randomnumber(&seed); |
2080 | j %= NSTR; |
2081 | k = randomnumber(&seed); |
2082 | k %= count234(tree)+1; |
2083 | printf("adding string %s at index %d\n", strings[j], k); |
2084 | addpostest(strings[j], k); |
2085 | } |
2086 | |
2087 | /* |
2088 | * While we have this tree in its full form, we'll take a copy |
2089 | * of it to use in split and join testing. |
2090 | */ |
2091 | tree2 = copytree234(tree, NULL, NULL); |
2092 | verifytree(tree2, array, arraylen);/* check the copy is accurate */ |
2093 | /* |
2094 | * Split tests. Split the tree at every possible point and |
2095 | * check the resulting subtrees. |
2096 | */ |
2097 | tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */ |
2098 | splittest(tree2, array, arraylen); |
2099 | /* |
2100 | * Now do the split test again, but on a tree that has a 2-root |
2101 | * (if the previous one didn't) or doesn't (if the previous one |
2102 | * did). |
2103 | */ |
2104 | tmplen = arraylen; |
2105 | while ((!tree2->root->elems[1]) == tworoot) { |
2106 | delpos234(tree2, --tmplen); |
2107 | } |
2108 | printf("now trying splits on second tree\n"); |
2109 | splittest(tree2, array, tmplen); |
2110 | freetree234(tree2); |
2111 | |
2112 | /* |
2113 | * Back to the main testing of uncounted trees. |
2114 | */ |
2115 | while (count234(tree) > 0) { |
2116 | printf("cleanup: tree size %d\n", count234(tree)); |
2117 | j = randomnumber(&seed); |
2118 | j %= count234(tree); |
2119 | printf("deleting string %s from index %d\n", (char *)array[j], j); |
2120 | delpostest(j); |
2121 | } |
2122 | freetree234(tree); |
2123 | |
2124 | /* |
2125 | * Finally, do some testing on split/join on _sorted_ trees. At |
2126 | * the same time, we'll be testing split on very small trees. |
2127 | */ |
2128 | tree = newtree234(mycmp); |
2129 | cmp = mycmp; |
2130 | arraylen = 0; |
7f82318f |
2131 | for (i = 0; i < 17; i++) { |
720a8fb7 |
2132 | tree2 = copytree234(tree, NULL, NULL); |
2133 | splittest(tree2, array, arraylen); |
2134 | freetree234(tree2); |
7f82318f |
2135 | if (i < 16) |
2136 | addtest(strings[i]); |
720a8fb7 |
2137 | } |
2138 | freetree234(tree); |
2139 | |
2140 | /* |
2141 | * Test silly cases of join: join(emptytree, emptytree), and |
2142 | * also ensure join correctly spots when sorted trees fail the |
2143 | * ordering constraint. |
2144 | */ |
2145 | tree = newtree234(mycmp); |
2146 | tree2 = newtree234(mycmp); |
2147 | tree3 = newtree234(mycmp); |
2148 | tree4 = newtree234(mycmp); |
2149 | assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */ |
2150 | add234(tree2, strings[1]); |
2151 | add234(tree4, strings[0]); |
2152 | array[0] = strings[0]; |
2153 | array[1] = strings[1]; |
2154 | verifytree(tree, array, 0); |
2155 | verifytree(tree2, array+1, 1); |
2156 | verifytree(tree3, array, 0); |
2157 | verifytree(tree4, array, 1); |
2158 | |
2159 | /* |
2160 | * So: |
2161 | * - join(tree,tree3) should leave both tree and tree3 unchanged. |
2162 | * - joinr(tree,tree2) should leave both tree and tree2 unchanged. |
2163 | * - join(tree4,tree3) should leave both tree3 and tree4 unchanged. |
2164 | * - join(tree, tree2) should move the element from tree2 to tree. |
2165 | * - joinr(tree4, tree3) should move the element from tree4 to tree3. |
2166 | * - join(tree,tree3) should return NULL and leave both unchanged. |
2167 | * - join(tree3,tree) should work and create a bigger tree in tree3. |
2168 | */ |
2169 | assert(tree == join234(tree, tree3)); |
2170 | verifytree(tree, array, 0); |
2171 | verifytree(tree3, array, 0); |
2172 | assert(tree2 == join234r(tree, tree2)); |
2173 | verifytree(tree, array, 0); |
2174 | verifytree(tree2, array+1, 1); |
2175 | assert(tree4 == join234(tree4, tree3)); |
2176 | verifytree(tree3, array, 0); |
2177 | verifytree(tree4, array, 1); |
2178 | assert(tree == join234(tree, tree2)); |
2179 | verifytree(tree, array+1, 1); |
2180 | verifytree(tree2, array, 0); |
2181 | assert(tree3 == join234r(tree4, tree3)); |
2182 | verifytree(tree3, array, 1); |
2183 | verifytree(tree4, array, 0); |
2184 | assert(NULL == join234(tree, tree3)); |
2185 | verifytree(tree, array+1, 1); |
2186 | verifytree(tree3, array, 1); |
2187 | assert(tree3 == join234(tree3, tree)); |
2188 | verifytree(tree3, array, 2); |
2189 | verifytree(tree, array, 0); |
2190 | |
2191 | return 0; |
2192 | } |
2193 | |
2194 | #endif |
2195 | |
2196 | #if 0 /* sorted list of strings might be useful */ |
2197 | { |
2198 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", |
2199 | } |
2200 | #endif |