720a8fb7 |
1 | /* |
2 | * tree234.c: reasonably generic counted 2-3-4 tree routines. |
3 | * |
4 | * This file is copyright 1999-2001 Simon Tatham. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person |
7 | * obtaining a copy of this software and associated documentation |
8 | * files (the "Software"), to deal in the Software without |
9 | * restriction, including without limitation the rights to use, |
10 | * copy, modify, merge, publish, distribute, sublicense, and/or |
11 | * sell copies of the Software, and to permit persons to whom the |
12 | * Software is furnished to do so, subject to the following |
13 | * conditions: |
14 | * |
15 | * The above copyright notice and this permission notice shall be |
16 | * included in all copies or substantial portions of the Software. |
17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR |
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
23 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
25 | * SOFTWARE. |
26 | */ |
27 | |
28 | #include <stdio.h> |
29 | #include <stdlib.h> |
30 | #include <assert.h> |
31 | |
32 | #include "tree234.h" |
33 | |
24055572 |
34 | #include "puzzles.h" /* for smalloc/sfree */ |
720a8fb7 |
35 | |
36 | #ifdef TEST |
37 | #define LOG(x) (printf x) |
38 | #else |
39 | #define LOG(x) |
40 | #endif |
41 | |
42 | typedef struct node234_Tag node234; |
43 | |
44 | struct tree234_Tag { |
45 | node234 *root; |
46 | cmpfn234 cmp; |
47 | }; |
48 | |
49 | struct node234_Tag { |
50 | node234 *parent; |
51 | node234 *kids[4]; |
52 | int counts[4]; |
53 | void *elems[3]; |
54 | }; |
55 | |
56 | /* |
57 | * Create a 2-3-4 tree. |
58 | */ |
59 | tree234 *newtree234(cmpfn234 cmp) { |
24055572 |
60 | tree234 *ret = snew(tree234); |
720a8fb7 |
61 | LOG(("created tree %p\n", ret)); |
62 | ret->root = NULL; |
63 | ret->cmp = cmp; |
64 | return ret; |
65 | } |
66 | |
67 | /* |
68 | * Free a 2-3-4 tree (not including freeing the elements). |
69 | */ |
70 | static void freenode234(node234 *n) { |
71 | if (!n) |
72 | return; |
73 | freenode234(n->kids[0]); |
74 | freenode234(n->kids[1]); |
75 | freenode234(n->kids[2]); |
76 | freenode234(n->kids[3]); |
77 | sfree(n); |
78 | } |
79 | void freetree234(tree234 *t) { |
80 | freenode234(t->root); |
81 | sfree(t); |
82 | } |
83 | |
84 | /* |
85 | * Internal function to count a node. |
86 | */ |
87 | static int countnode234(node234 *n) { |
88 | int count = 0; |
89 | int i; |
90 | if (!n) |
91 | return 0; |
92 | for (i = 0; i < 4; i++) |
93 | count += n->counts[i]; |
94 | for (i = 0; i < 3; i++) |
95 | if (n->elems[i]) |
96 | count++; |
97 | return count; |
98 | } |
99 | |
100 | /* |
101 | * Count the elements in a tree. |
102 | */ |
103 | int count234(tree234 *t) { |
104 | if (t->root) |
105 | return countnode234(t->root); |
106 | else |
107 | return 0; |
108 | } |
109 | |
110 | /* |
111 | * Propagate a node overflow up a tree until it stops. Returns 0 or |
112 | * 1, depending on whether the root had to be split or not. |
113 | */ |
114 | static int add234_insert(node234 *left, void *e, node234 *right, |
115 | node234 **root, node234 *n, int ki) { |
116 | int lcount, rcount; |
117 | /* |
118 | * We need to insert the new left/element/right set in n at |
119 | * child position ki. |
120 | */ |
121 | lcount = countnode234(left); |
122 | rcount = countnode234(right); |
123 | while (n) { |
124 | LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
125 | n, |
126 | n->kids[0], n->counts[0], n->elems[0], |
127 | n->kids[1], n->counts[1], n->elems[1], |
128 | n->kids[2], n->counts[2], n->elems[2], |
129 | n->kids[3], n->counts[3])); |
130 | LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n", |
131 | left, lcount, e, right, rcount, ki)); |
132 | if (n->elems[1] == NULL) { |
133 | /* |
134 | * Insert in a 2-node; simple. |
135 | */ |
136 | if (ki == 0) { |
137 | LOG((" inserting on left of 2-node\n")); |
138 | n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1]; |
139 | n->elems[1] = n->elems[0]; |
140 | n->kids[1] = right; n->counts[1] = rcount; |
141 | n->elems[0] = e; |
142 | n->kids[0] = left; n->counts[0] = lcount; |
143 | } else { /* ki == 1 */ |
144 | LOG((" inserting on right of 2-node\n")); |
145 | n->kids[2] = right; n->counts[2] = rcount; |
146 | n->elems[1] = e; |
147 | n->kids[1] = left; n->counts[1] = lcount; |
148 | } |
149 | if (n->kids[0]) n->kids[0]->parent = n; |
150 | if (n->kids[1]) n->kids[1]->parent = n; |
151 | if (n->kids[2]) n->kids[2]->parent = n; |
152 | LOG((" done\n")); |
153 | break; |
154 | } else if (n->elems[2] == NULL) { |
155 | /* |
156 | * Insert in a 3-node; simple. |
157 | */ |
158 | if (ki == 0) { |
159 | LOG((" inserting on left of 3-node\n")); |
160 | n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2]; |
161 | n->elems[2] = n->elems[1]; |
162 | n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1]; |
163 | n->elems[1] = n->elems[0]; |
164 | n->kids[1] = right; n->counts[1] = rcount; |
165 | n->elems[0] = e; |
166 | n->kids[0] = left; n->counts[0] = lcount; |
167 | } else if (ki == 1) { |
168 | LOG((" inserting in middle of 3-node\n")); |
169 | n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2]; |
170 | n->elems[2] = n->elems[1]; |
171 | n->kids[2] = right; n->counts[2] = rcount; |
172 | n->elems[1] = e; |
173 | n->kids[1] = left; n->counts[1] = lcount; |
174 | } else { /* ki == 2 */ |
175 | LOG((" inserting on right of 3-node\n")); |
176 | n->kids[3] = right; n->counts[3] = rcount; |
177 | n->elems[2] = e; |
178 | n->kids[2] = left; n->counts[2] = lcount; |
179 | } |
180 | if (n->kids[0]) n->kids[0]->parent = n; |
181 | if (n->kids[1]) n->kids[1]->parent = n; |
182 | if (n->kids[2]) n->kids[2]->parent = n; |
183 | if (n->kids[3]) n->kids[3]->parent = n; |
184 | LOG((" done\n")); |
185 | break; |
186 | } else { |
24055572 |
187 | node234 *m = snew(node234); |
720a8fb7 |
188 | m->parent = n->parent; |
189 | LOG((" splitting a 4-node; created new node %p\n", m)); |
190 | /* |
191 | * Insert in a 4-node; split into a 2-node and a |
192 | * 3-node, and move focus up a level. |
193 | * |
194 | * I don't think it matters which way round we put the |
195 | * 2 and the 3. For simplicity, we'll put the 3 first |
196 | * always. |
197 | */ |
198 | if (ki == 0) { |
199 | m->kids[0] = left; m->counts[0] = lcount; |
200 | m->elems[0] = e; |
201 | m->kids[1] = right; m->counts[1] = rcount; |
202 | m->elems[1] = n->elems[0]; |
203 | m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1]; |
204 | e = n->elems[1]; |
205 | n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2]; |
206 | n->elems[0] = n->elems[2]; |
207 | n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; |
208 | } else if (ki == 1) { |
209 | m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; |
210 | m->elems[0] = n->elems[0]; |
211 | m->kids[1] = left; m->counts[1] = lcount; |
212 | m->elems[1] = e; |
213 | m->kids[2] = right; m->counts[2] = rcount; |
214 | e = n->elems[1]; |
215 | n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2]; |
216 | n->elems[0] = n->elems[2]; |
217 | n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; |
218 | } else if (ki == 2) { |
219 | m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; |
220 | m->elems[0] = n->elems[0]; |
221 | m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1]; |
222 | m->elems[1] = n->elems[1]; |
223 | m->kids[2] = left; m->counts[2] = lcount; |
224 | /* e = e; */ |
225 | n->kids[0] = right; n->counts[0] = rcount; |
226 | n->elems[0] = n->elems[2]; |
227 | n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; |
228 | } else { /* ki == 3 */ |
229 | m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; |
230 | m->elems[0] = n->elems[0]; |
231 | m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1]; |
232 | m->elems[1] = n->elems[1]; |
233 | m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2]; |
234 | n->kids[0] = left; n->counts[0] = lcount; |
235 | n->elems[0] = e; |
236 | n->kids[1] = right; n->counts[1] = rcount; |
237 | e = n->elems[2]; |
238 | } |
239 | m->kids[3] = n->kids[3] = n->kids[2] = NULL; |
240 | m->counts[3] = n->counts[3] = n->counts[2] = 0; |
241 | m->elems[2] = n->elems[2] = n->elems[1] = NULL; |
242 | if (m->kids[0]) m->kids[0]->parent = m; |
243 | if (m->kids[1]) m->kids[1]->parent = m; |
244 | if (m->kids[2]) m->kids[2]->parent = m; |
245 | if (n->kids[0]) n->kids[0]->parent = n; |
246 | if (n->kids[1]) n->kids[1]->parent = n; |
247 | LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m, |
248 | m->kids[0], m->counts[0], m->elems[0], |
249 | m->kids[1], m->counts[1], m->elems[1], |
250 | m->kids[2], m->counts[2])); |
251 | LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n, |
252 | n->kids[0], n->counts[0], n->elems[0], |
253 | n->kids[1], n->counts[1])); |
254 | left = m; lcount = countnode234(left); |
255 | right = n; rcount = countnode234(right); |
256 | } |
257 | if (n->parent) |
258 | ki = (n->parent->kids[0] == n ? 0 : |
259 | n->parent->kids[1] == n ? 1 : |
260 | n->parent->kids[2] == n ? 2 : 3); |
261 | n = n->parent; |
262 | } |
263 | |
264 | /* |
265 | * If we've come out of here by `break', n will still be |
266 | * non-NULL and all we need to do is go back up the tree |
267 | * updating counts. If we've come here because n is NULL, we |
268 | * need to create a new root for the tree because the old one |
269 | * has just split into two. */ |
270 | if (n) { |
271 | while (n->parent) { |
272 | int count = countnode234(n); |
273 | int childnum; |
274 | childnum = (n->parent->kids[0] == n ? 0 : |
275 | n->parent->kids[1] == n ? 1 : |
276 | n->parent->kids[2] == n ? 2 : 3); |
277 | n->parent->counts[childnum] = count; |
278 | n = n->parent; |
279 | } |
280 | return 0; /* root unchanged */ |
281 | } else { |
282 | LOG((" root is overloaded, split into two\n")); |
24055572 |
283 | (*root) = snew(node234); |
720a8fb7 |
284 | (*root)->kids[0] = left; (*root)->counts[0] = lcount; |
285 | (*root)->elems[0] = e; |
286 | (*root)->kids[1] = right; (*root)->counts[1] = rcount; |
287 | (*root)->elems[1] = NULL; |
288 | (*root)->kids[2] = NULL; (*root)->counts[2] = 0; |
289 | (*root)->elems[2] = NULL; |
290 | (*root)->kids[3] = NULL; (*root)->counts[3] = 0; |
291 | (*root)->parent = NULL; |
292 | if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root); |
293 | if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root); |
294 | LOG((" new root is %p/%d \"%s\" %p/%d\n", |
295 | (*root)->kids[0], (*root)->counts[0], |
296 | (*root)->elems[0], |
297 | (*root)->kids[1], (*root)->counts[1])); |
298 | return 1; /* root moved */ |
299 | } |
300 | } |
301 | |
302 | /* |
303 | * Add an element e to a 2-3-4 tree t. Returns e on success, or if |
304 | * an existing element compares equal, returns that. |
305 | */ |
306 | static void *add234_internal(tree234 *t, void *e, int index) { |
307 | node234 *n; |
308 | int ki; |
309 | void *orig_e = e; |
310 | int c; |
311 | |
312 | LOG(("adding element \"%s\" to tree %p\n", e, t)); |
313 | if (t->root == NULL) { |
24055572 |
314 | t->root = snew(node234); |
720a8fb7 |
315 | t->root->elems[1] = t->root->elems[2] = NULL; |
316 | t->root->kids[0] = t->root->kids[1] = NULL; |
317 | t->root->kids[2] = t->root->kids[3] = NULL; |
318 | t->root->counts[0] = t->root->counts[1] = 0; |
319 | t->root->counts[2] = t->root->counts[3] = 0; |
320 | t->root->parent = NULL; |
321 | t->root->elems[0] = e; |
322 | LOG((" created root %p\n", t->root)); |
323 | return orig_e; |
324 | } |
325 | |
326 | n = t->root; |
327 | while (n) { |
328 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
329 | n, |
330 | n->kids[0], n->counts[0], n->elems[0], |
331 | n->kids[1], n->counts[1], n->elems[1], |
332 | n->kids[2], n->counts[2], n->elems[2], |
333 | n->kids[3], n->counts[3])); |
334 | if (index >= 0) { |
335 | if (!n->kids[0]) { |
336 | /* |
337 | * Leaf node. We want to insert at kid position |
338 | * equal to the index: |
339 | * |
340 | * 0 A 1 B 2 C 3 |
341 | */ |
342 | ki = index; |
343 | } else { |
344 | /* |
345 | * Internal node. We always descend through it (add |
346 | * always starts at the bottom, never in the |
347 | * middle). |
348 | */ |
349 | if (index <= n->counts[0]) { |
350 | ki = 0; |
351 | } else if (index -= n->counts[0] + 1, index <= n->counts[1]) { |
352 | ki = 1; |
353 | } else if (index -= n->counts[1] + 1, index <= n->counts[2]) { |
354 | ki = 2; |
355 | } else if (index -= n->counts[2] + 1, index <= n->counts[3]) { |
356 | ki = 3; |
357 | } else |
358 | return NULL; /* error: index out of range */ |
359 | } |
360 | } else { |
361 | if ((c = t->cmp(e, n->elems[0])) < 0) |
362 | ki = 0; |
363 | else if (c == 0) |
364 | return n->elems[0]; /* already exists */ |
365 | else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0) |
366 | ki = 1; |
367 | else if (c == 0) |
368 | return n->elems[1]; /* already exists */ |
369 | else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0) |
370 | ki = 2; |
371 | else if (c == 0) |
372 | return n->elems[2]; /* already exists */ |
373 | else |
374 | ki = 3; |
375 | } |
376 | LOG((" moving to child %d (%p)\n", ki, n->kids[ki])); |
377 | if (!n->kids[ki]) |
378 | break; |
379 | n = n->kids[ki]; |
380 | } |
381 | |
382 | add234_insert(NULL, e, NULL, &t->root, n, ki); |
383 | |
384 | return orig_e; |
385 | } |
386 | |
387 | void *add234(tree234 *t, void *e) { |
388 | if (!t->cmp) /* tree is unsorted */ |
389 | return NULL; |
390 | |
391 | return add234_internal(t, e, -1); |
392 | } |
393 | void *addpos234(tree234 *t, void *e, int index) { |
394 | if (index < 0 || /* index out of range */ |
395 | t->cmp) /* tree is sorted */ |
396 | return NULL; /* return failure */ |
397 | |
398 | return add234_internal(t, e, index); /* this checks the upper bound */ |
399 | } |
400 | |
401 | /* |
402 | * Look up the element at a given numeric index in a 2-3-4 tree. |
403 | * Returns NULL if the index is out of range. |
404 | */ |
405 | void *index234(tree234 *t, int index) { |
406 | node234 *n; |
407 | |
408 | if (!t->root) |
409 | return NULL; /* tree is empty */ |
410 | |
411 | if (index < 0 || index >= countnode234(t->root)) |
412 | return NULL; /* out of range */ |
413 | |
414 | n = t->root; |
415 | |
416 | while (n) { |
417 | if (index < n->counts[0]) |
418 | n = n->kids[0]; |
419 | else if (index -= n->counts[0] + 1, index < 0) |
420 | return n->elems[0]; |
421 | else if (index < n->counts[1]) |
422 | n = n->kids[1]; |
423 | else if (index -= n->counts[1] + 1, index < 0) |
424 | return n->elems[1]; |
425 | else if (index < n->counts[2]) |
426 | n = n->kids[2]; |
427 | else if (index -= n->counts[2] + 1, index < 0) |
428 | return n->elems[2]; |
429 | else |
430 | n = n->kids[3]; |
431 | } |
432 | |
433 | /* We shouldn't ever get here. I wonder how we did. */ |
434 | return NULL; |
435 | } |
436 | |
437 | /* |
438 | * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not |
439 | * found. e is always passed as the first argument to cmp, so cmp |
440 | * can be an asymmetric function if desired. cmp can also be passed |
441 | * as NULL, in which case the compare function from the tree proper |
442 | * will be used. |
443 | */ |
444 | void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, |
445 | int relation, int *index) { |
446 | node234 *n; |
447 | void *ret; |
448 | int c; |
449 | int idx, ecount, kcount, cmpret; |
450 | |
451 | if (t->root == NULL) |
452 | return NULL; |
453 | |
454 | if (cmp == NULL) |
455 | cmp = t->cmp; |
456 | |
457 | n = t->root; |
458 | /* |
459 | * Attempt to find the element itself. |
460 | */ |
461 | idx = 0; |
462 | ecount = -1; |
463 | /* |
464 | * Prepare a fake `cmp' result if e is NULL. |
465 | */ |
466 | cmpret = 0; |
467 | if (e == NULL) { |
468 | assert(relation == REL234_LT || relation == REL234_GT); |
469 | if (relation == REL234_LT) |
470 | cmpret = +1; /* e is a max: always greater */ |
471 | else if (relation == REL234_GT) |
472 | cmpret = -1; /* e is a min: always smaller */ |
473 | } |
474 | while (1) { |
475 | for (kcount = 0; kcount < 4; kcount++) { |
476 | if (kcount >= 3 || n->elems[kcount] == NULL || |
477 | (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) { |
478 | break; |
479 | } |
480 | if (n->kids[kcount]) idx += n->counts[kcount]; |
481 | if (c == 0) { |
482 | ecount = kcount; |
483 | break; |
484 | } |
485 | idx++; |
486 | } |
487 | if (ecount >= 0) |
488 | break; |
489 | if (n->kids[kcount]) |
490 | n = n->kids[kcount]; |
491 | else |
492 | break; |
493 | } |
494 | |
495 | if (ecount >= 0) { |
496 | /* |
497 | * We have found the element we're looking for. It's |
498 | * n->elems[ecount], at tree index idx. If our search |
499 | * relation is EQ, LE or GE we can now go home. |
500 | */ |
501 | if (relation != REL234_LT && relation != REL234_GT) { |
502 | if (index) *index = idx; |
503 | return n->elems[ecount]; |
504 | } |
505 | |
506 | /* |
507 | * Otherwise, we'll do an indexed lookup for the previous |
508 | * or next element. (It would be perfectly possible to |
509 | * implement these search types in a non-counted tree by |
510 | * going back up from where we are, but far more fiddly.) |
511 | */ |
512 | if (relation == REL234_LT) |
513 | idx--; |
514 | else |
515 | idx++; |
516 | } else { |
517 | /* |
518 | * We've found our way to the bottom of the tree and we |
519 | * know where we would insert this node if we wanted to: |
520 | * we'd put it in in place of the (empty) subtree |
521 | * n->kids[kcount], and it would have index idx |
522 | * |
523 | * But the actual element isn't there. So if our search |
524 | * relation is EQ, we're doomed. |
525 | */ |
526 | if (relation == REL234_EQ) |
527 | return NULL; |
528 | |
529 | /* |
530 | * Otherwise, we must do an index lookup for index idx-1 |
531 | * (if we're going left - LE or LT) or index idx (if we're |
532 | * going right - GE or GT). |
533 | */ |
534 | if (relation == REL234_LT || relation == REL234_LE) { |
535 | idx--; |
536 | } |
537 | } |
538 | |
539 | /* |
540 | * We know the index of the element we want; just call index234 |
541 | * to do the rest. This will return NULL if the index is out of |
542 | * bounds, which is exactly what we want. |
543 | */ |
544 | ret = index234(t, idx); |
545 | if (ret && index) *index = idx; |
546 | return ret; |
547 | } |
548 | void *find234(tree234 *t, void *e, cmpfn234 cmp) { |
549 | return findrelpos234(t, e, cmp, REL234_EQ, NULL); |
550 | } |
551 | void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) { |
552 | return findrelpos234(t, e, cmp, relation, NULL); |
553 | } |
554 | void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) { |
555 | return findrelpos234(t, e, cmp, REL234_EQ, index); |
556 | } |
557 | |
558 | /* |
559 | * Tree transformation used in delete and split: move a subtree |
560 | * right, from child ki of a node to the next child. Update k and |
561 | * index so that they still point to the same place in the |
562 | * transformed tree. Assumes the destination child is not full, and |
563 | * that the source child does have a subtree to spare. Can cope if |
564 | * the destination child is undersized. |
565 | * |
566 | * . C . . B . |
567 | * / \ -> / \ |
568 | * [more] a A b B c d D e [more] a A b c C d D e |
569 | * |
570 | * . C . . B . |
571 | * / \ -> / \ |
572 | * [more] a A b B c d [more] a A b c C d |
573 | */ |
574 | static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) { |
575 | node234 *src, *dest; |
576 | int i, srclen, adjust; |
577 | |
578 | src = n->kids[ki]; |
579 | dest = n->kids[ki+1]; |
580 | |
581 | LOG((" trans234_subtree_right(%p, %d):\n", n, ki)); |
582 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
583 | n, |
584 | n->kids[0], n->counts[0], n->elems[0], |
585 | n->kids[1], n->counts[1], n->elems[1], |
586 | n->kids[2], n->counts[2], n->elems[2], |
587 | n->kids[3], n->counts[3])); |
588 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
589 | src, |
590 | src->kids[0], src->counts[0], src->elems[0], |
591 | src->kids[1], src->counts[1], src->elems[1], |
592 | src->kids[2], src->counts[2], src->elems[2], |
593 | src->kids[3], src->counts[3])); |
594 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
595 | dest, |
596 | dest->kids[0], dest->counts[0], dest->elems[0], |
597 | dest->kids[1], dest->counts[1], dest->elems[1], |
598 | dest->kids[2], dest->counts[2], dest->elems[2], |
599 | dest->kids[3], dest->counts[3])); |
600 | /* |
601 | * Move over the rest of the destination node to make space. |
602 | */ |
603 | dest->kids[3] = dest->kids[2]; dest->counts[3] = dest->counts[2]; |
604 | dest->elems[2] = dest->elems[1]; |
605 | dest->kids[2] = dest->kids[1]; dest->counts[2] = dest->counts[1]; |
606 | dest->elems[1] = dest->elems[0]; |
607 | dest->kids[1] = dest->kids[0]; dest->counts[1] = dest->counts[0]; |
608 | |
609 | /* which element to move over */ |
610 | i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0); |
611 | |
612 | dest->elems[0] = n->elems[ki]; |
613 | n->elems[ki] = src->elems[i]; |
614 | src->elems[i] = NULL; |
615 | |
616 | dest->kids[0] = src->kids[i+1]; dest->counts[0] = src->counts[i+1]; |
617 | src->kids[i+1] = NULL; src->counts[i+1] = 0; |
618 | |
619 | if (dest->kids[0]) dest->kids[0]->parent = dest; |
620 | |
621 | adjust = dest->counts[0] + 1; |
622 | |
623 | n->counts[ki] -= adjust; |
624 | n->counts[ki+1] += adjust; |
625 | |
626 | srclen = n->counts[ki]; |
627 | |
628 | if (k) { |
629 | LOG((" before: k,index = %d,%d\n", (*k), (*index))); |
630 | if ((*k) == ki && (*index) > srclen) { |
631 | (*index) -= srclen + 1; |
632 | (*k)++; |
633 | } else if ((*k) == ki+1) { |
634 | (*index) += adjust; |
635 | } |
636 | LOG((" after: k,index = %d,%d\n", (*k), (*index))); |
637 | } |
638 | |
639 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
640 | n, |
641 | n->kids[0], n->counts[0], n->elems[0], |
642 | n->kids[1], n->counts[1], n->elems[1], |
643 | n->kids[2], n->counts[2], n->elems[2], |
644 | n->kids[3], n->counts[3])); |
645 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
646 | src, |
647 | src->kids[0], src->counts[0], src->elems[0], |
648 | src->kids[1], src->counts[1], src->elems[1], |
649 | src->kids[2], src->counts[2], src->elems[2], |
650 | src->kids[3], src->counts[3])); |
651 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
652 | dest, |
653 | dest->kids[0], dest->counts[0], dest->elems[0], |
654 | dest->kids[1], dest->counts[1], dest->elems[1], |
655 | dest->kids[2], dest->counts[2], dest->elems[2], |
656 | dest->kids[3], dest->counts[3])); |
657 | } |
658 | |
659 | /* |
660 | * Tree transformation used in delete and split: move a subtree |
661 | * left, from child ki of a node to the previous child. Update k |
662 | * and index so that they still point to the same place in the |
663 | * transformed tree. Assumes the destination child is not full, and |
664 | * that the source child does have a subtree to spare. Can cope if |
665 | * the destination child is undersized. |
666 | * |
667 | * . B . . C . |
668 | * / \ -> / \ |
669 | * a A b c C d D e [more] a A b B c d D e [more] |
670 | * |
671 | * . A . . B . |
672 | * / \ -> / \ |
673 | * a b B c C d [more] a A b c C d [more] |
674 | */ |
675 | static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) { |
676 | node234 *src, *dest; |
677 | int i, adjust; |
678 | |
679 | src = n->kids[ki]; |
680 | dest = n->kids[ki-1]; |
681 | |
682 | LOG((" trans234_subtree_left(%p, %d):\n", n, ki)); |
683 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
684 | n, |
685 | n->kids[0], n->counts[0], n->elems[0], |
686 | n->kids[1], n->counts[1], n->elems[1], |
687 | n->kids[2], n->counts[2], n->elems[2], |
688 | n->kids[3], n->counts[3])); |
689 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
690 | dest, |
691 | dest->kids[0], dest->counts[0], dest->elems[0], |
692 | dest->kids[1], dest->counts[1], dest->elems[1], |
693 | dest->kids[2], dest->counts[2], dest->elems[2], |
694 | dest->kids[3], dest->counts[3])); |
695 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
696 | src, |
697 | src->kids[0], src->counts[0], src->elems[0], |
698 | src->kids[1], src->counts[1], src->elems[1], |
699 | src->kids[2], src->counts[2], src->elems[2], |
700 | src->kids[3], src->counts[3])); |
701 | |
702 | /* where in dest to put it */ |
703 | i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0); |
704 | dest->elems[i] = n->elems[ki-1]; |
705 | n->elems[ki-1] = src->elems[0]; |
706 | |
707 | dest->kids[i+1] = src->kids[0]; dest->counts[i+1] = src->counts[0]; |
708 | |
709 | if (dest->kids[i+1]) dest->kids[i+1]->parent = dest; |
710 | |
711 | /* |
712 | * Move over the rest of the source node. |
713 | */ |
714 | src->kids[0] = src->kids[1]; src->counts[0] = src->counts[1]; |
715 | src->elems[0] = src->elems[1]; |
716 | src->kids[1] = src->kids[2]; src->counts[1] = src->counts[2]; |
717 | src->elems[1] = src->elems[2]; |
718 | src->kids[2] = src->kids[3]; src->counts[2] = src->counts[3]; |
719 | src->elems[2] = NULL; |
720 | src->kids[3] = NULL; src->counts[3] = 0; |
721 | |
722 | adjust = dest->counts[i+1] + 1; |
723 | |
724 | n->counts[ki] -= adjust; |
725 | n->counts[ki-1] += adjust; |
726 | |
727 | if (k) { |
728 | LOG((" before: k,index = %d,%d\n", (*k), (*index))); |
729 | if ((*k) == ki) { |
730 | (*index) -= adjust; |
731 | if ((*index) < 0) { |
732 | (*index) += n->counts[ki-1] + 1; |
733 | (*k)--; |
734 | } |
735 | } |
736 | LOG((" after: k,index = %d,%d\n", (*k), (*index))); |
737 | } |
738 | |
739 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
740 | n, |
741 | n->kids[0], n->counts[0], n->elems[0], |
742 | n->kids[1], n->counts[1], n->elems[1], |
743 | n->kids[2], n->counts[2], n->elems[2], |
744 | n->kids[3], n->counts[3])); |
745 | LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
746 | dest, |
747 | dest->kids[0], dest->counts[0], dest->elems[0], |
748 | dest->kids[1], dest->counts[1], dest->elems[1], |
749 | dest->kids[2], dest->counts[2], dest->elems[2], |
750 | dest->kids[3], dest->counts[3])); |
751 | LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
752 | src, |
753 | src->kids[0], src->counts[0], src->elems[0], |
754 | src->kids[1], src->counts[1], src->elems[1], |
755 | src->kids[2], src->counts[2], src->elems[2], |
756 | src->kids[3], src->counts[3])); |
757 | } |
758 | |
759 | /* |
760 | * Tree transformation used in delete and split: merge child nodes |
761 | * ki and ki+1 of a node. Update k and index so that they still |
762 | * point to the same place in the transformed tree. Assumes both |
763 | * children _are_ sufficiently small. |
764 | * |
765 | * . B . . |
766 | * / \ -> | |
767 | * a A b c C d a A b B c C d |
768 | * |
769 | * This routine can also cope with either child being undersized: |
770 | * |
771 | * . A . . |
772 | * / \ -> | |
773 | * a b B c a A b B c |
774 | * |
775 | * . A . . |
776 | * / \ -> | |
777 | * a b B c C d a A b B c C d |
778 | */ |
779 | static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) { |
780 | node234 *left, *right; |
781 | int i, leftlen, rightlen, lsize, rsize; |
782 | |
783 | left = n->kids[ki]; leftlen = n->counts[ki]; |
784 | right = n->kids[ki+1]; rightlen = n->counts[ki+1]; |
785 | |
786 | LOG((" trans234_subtree_merge(%p, %d):\n", n, ki)); |
787 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
788 | n, |
789 | n->kids[0], n->counts[0], n->elems[0], |
790 | n->kids[1], n->counts[1], n->elems[1], |
791 | n->kids[2], n->counts[2], n->elems[2], |
792 | n->kids[3], n->counts[3])); |
793 | LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
794 | left, |
795 | left->kids[0], left->counts[0], left->elems[0], |
796 | left->kids[1], left->counts[1], left->elems[1], |
797 | left->kids[2], left->counts[2], left->elems[2], |
798 | left->kids[3], left->counts[3])); |
799 | LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
800 | right, |
801 | right->kids[0], right->counts[0], right->elems[0], |
802 | right->kids[1], right->counts[1], right->elems[1], |
803 | right->kids[2], right->counts[2], right->elems[2], |
804 | right->kids[3], right->counts[3])); |
805 | |
806 | assert(!left->elems[2] && !right->elems[2]); /* neither is large! */ |
807 | lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0); |
808 | rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0); |
809 | |
810 | left->elems[lsize] = n->elems[ki]; |
811 | |
812 | for (i = 0; i < rsize+1; i++) { |
813 | left->kids[lsize+1+i] = right->kids[i]; |
814 | left->counts[lsize+1+i] = right->counts[i]; |
815 | if (left->kids[lsize+1+i]) |
816 | left->kids[lsize+1+i]->parent = left; |
817 | if (i < rsize) |
818 | left->elems[lsize+1+i] = right->elems[i]; |
819 | } |
820 | |
821 | n->counts[ki] += rightlen + 1; |
822 | |
823 | sfree(right); |
824 | |
825 | /* |
826 | * Move the rest of n up by one. |
827 | */ |
828 | for (i = ki+1; i < 3; i++) { |
829 | n->kids[i] = n->kids[i+1]; |
830 | n->counts[i] = n->counts[i+1]; |
831 | } |
832 | for (i = ki; i < 2; i++) { |
833 | n->elems[i] = n->elems[i+1]; |
834 | } |
835 | n->kids[3] = NULL; |
836 | n->counts[3] = 0; |
837 | n->elems[2] = NULL; |
838 | |
839 | if (k) { |
840 | LOG((" before: k,index = %d,%d\n", (*k), (*index))); |
841 | if ((*k) == ki+1) { |
842 | (*k)--; |
843 | (*index) += leftlen + 1; |
844 | } else if ((*k) > ki+1) { |
845 | (*k)--; |
846 | } |
847 | LOG((" after: k,index = %d,%d\n", (*k), (*index))); |
848 | } |
849 | |
850 | LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
851 | n, |
852 | n->kids[0], n->counts[0], n->elems[0], |
853 | n->kids[1], n->counts[1], n->elems[1], |
854 | n->kids[2], n->counts[2], n->elems[2], |
855 | n->kids[3], n->counts[3])); |
856 | LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
857 | left, |
858 | left->kids[0], left->counts[0], left->elems[0], |
859 | left->kids[1], left->counts[1], left->elems[1], |
860 | left->kids[2], left->counts[2], left->elems[2], |
861 | left->kids[3], left->counts[3])); |
862 | |
863 | } |
864 | |
865 | /* |
866 | * Delete an element e in a 2-3-4 tree. Does not free the element, |
867 | * merely removes all links to it from the tree nodes. |
868 | */ |
869 | static void *delpos234_internal(tree234 *t, int index) { |
870 | node234 *n; |
871 | void *retval; |
872 | int ki, i; |
873 | |
874 | retval = NULL; |
875 | |
876 | n = t->root; /* by assumption this is non-NULL */ |
877 | LOG(("deleting item %d from tree %p\n", index, t)); |
878 | while (1) { |
879 | node234 *sub; |
880 | |
881 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", |
882 | n, |
883 | n->kids[0], n->counts[0], n->elems[0], |
884 | n->kids[1], n->counts[1], n->elems[1], |
885 | n->kids[2], n->counts[2], n->elems[2], |
886 | n->kids[3], n->counts[3], |
887 | index)); |
888 | if (index <= n->counts[0]) { |
889 | ki = 0; |
890 | } else if (index -= n->counts[0]+1, index <= n->counts[1]) { |
891 | ki = 1; |
892 | } else if (index -= n->counts[1]+1, index <= n->counts[2]) { |
893 | ki = 2; |
894 | } else if (index -= n->counts[2]+1, index <= n->counts[3]) { |
895 | ki = 3; |
896 | } else { |
897 | assert(0); /* can't happen */ |
898 | } |
899 | |
900 | if (!n->kids[0]) |
901 | break; /* n is a leaf node; we're here! */ |
902 | |
903 | /* |
904 | * Check to see if we've found our target element. If so, |
905 | * we must choose a new target (we'll use the old target's |
906 | * successor, which will be in a leaf), move it into the |
907 | * place of the old one, continue down to the leaf and |
908 | * delete the old copy of the new target. |
909 | */ |
910 | if (index == n->counts[ki]) { |
911 | node234 *m; |
912 | LOG((" found element in internal node, index %d\n", ki)); |
913 | assert(n->elems[ki]); /* must be a kid _before_ an element */ |
914 | ki++; index = 0; |
915 | for (m = n->kids[ki]; m->kids[0]; m = m->kids[0]) |
916 | continue; |
917 | LOG((" replacing with element \"%s\" from leaf node %p\n", |
918 | m->elems[0], m)); |
919 | retval = n->elems[ki-1]; |
920 | n->elems[ki-1] = m->elems[0]; |
921 | } |
922 | |
923 | /* |
924 | * Recurse down to subtree ki. If it has only one element, |
925 | * we have to do some transformation to start with. |
926 | */ |
927 | LOG((" moving to subtree %d\n", ki)); |
928 | sub = n->kids[ki]; |
929 | if (!sub->elems[1]) { |
930 | LOG((" subtree has only one element!\n")); |
931 | if (ki > 0 && n->kids[ki-1]->elems[1]) { |
932 | /* |
933 | * Child ki has only one element, but child |
934 | * ki-1 has two or more. So we need to move a |
935 | * subtree from ki-1 to ki. |
936 | */ |
937 | trans234_subtree_right(n, ki-1, &ki, &index); |
938 | } else if (ki < 3 && n->kids[ki+1] && |
939 | n->kids[ki+1]->elems[1]) { |
940 | /* |
941 | * Child ki has only one element, but ki+1 has |
942 | * two or more. Move a subtree from ki+1 to ki. |
943 | */ |
944 | trans234_subtree_left(n, ki+1, &ki, &index); |
945 | } else { |
946 | /* |
947 | * ki is small with only small neighbours. Pick a |
948 | * neighbour and merge with it. |
949 | */ |
950 | trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index); |
951 | sub = n->kids[ki]; |
952 | |
953 | if (!n->elems[0]) { |
954 | /* |
955 | * The root is empty and needs to be |
956 | * removed. |
957 | */ |
958 | LOG((" shifting root!\n")); |
959 | t->root = sub; |
960 | sub->parent = NULL; |
961 | sfree(n); |
962 | n = NULL; |
963 | } |
964 | } |
965 | } |
966 | |
967 | if (n) |
968 | n->counts[ki]--; |
969 | n = sub; |
970 | } |
971 | |
972 | /* |
973 | * Now n is a leaf node, and ki marks the element number we |
974 | * want to delete. We've already arranged for the leaf to be |
975 | * bigger than minimum size, so let's just go to it. |
976 | */ |
977 | assert(!n->kids[0]); |
978 | if (!retval) |
979 | retval = n->elems[ki]; |
980 | |
981 | for (i = ki; i < 2 && n->elems[i+1]; i++) |
982 | n->elems[i] = n->elems[i+1]; |
983 | n->elems[i] = NULL; |
984 | |
985 | /* |
986 | * It's just possible that we have reduced the leaf to zero |
987 | * size. This can only happen if it was the root - so destroy |
988 | * it and make the tree empty. |
989 | */ |
990 | if (!n->elems[0]) { |
991 | LOG((" removed last element in tree, destroying empty root\n")); |
992 | assert(n == t->root); |
993 | sfree(n); |
994 | t->root = NULL; |
995 | } |
996 | |
997 | return retval; /* finished! */ |
998 | } |
999 | void *delpos234(tree234 *t, int index) { |
1000 | if (index < 0 || index >= countnode234(t->root)) |
1001 | return NULL; |
1002 | return delpos234_internal(t, index); |
1003 | } |
1004 | void *del234(tree234 *t, void *e) { |
1005 | int index; |
1006 | if (!findrelpos234(t, e, NULL, REL234_EQ, &index)) |
1007 | return NULL; /* it wasn't in there anyway */ |
1008 | return delpos234_internal(t, index); /* it's there; delete it. */ |
1009 | } |
1010 | |
1011 | /* |
1012 | * Join two subtrees together with a separator element between |
1013 | * them, given their relative height. |
1014 | * |
1015 | * (Height<0 means the left tree is shorter, >0 means the right |
1016 | * tree is shorter, =0 means (duh) they're equal.) |
1017 | * |
1018 | * It is assumed that any checks needed on the ordering criterion |
1019 | * have _already_ been done. |
1020 | * |
1021 | * The value returned in `height' is 0 or 1 depending on whether the |
1022 | * resulting tree is the same height as the original larger one, or |
1023 | * one higher. |
1024 | */ |
1025 | static node234 *join234_internal(node234 *left, void *sep, |
1026 | node234 *right, int *height) { |
1027 | node234 *root, *node; |
1028 | int relht = *height; |
1029 | int ki; |
1030 | |
1031 | LOG((" join: joining %p \"%s\" %p, relative height is %d\n", |
1032 | left, sep, right, relht)); |
1033 | if (relht == 0) { |
1034 | /* |
1035 | * The trees are the same height. Create a new one-element |
1036 | * root containing the separator and pointers to the two |
1037 | * nodes. |
1038 | */ |
1039 | node234 *newroot; |
24055572 |
1040 | newroot = snew(node234); |
720a8fb7 |
1041 | newroot->kids[0] = left; newroot->counts[0] = countnode234(left); |
1042 | newroot->elems[0] = sep; |
1043 | newroot->kids[1] = right; newroot->counts[1] = countnode234(right); |
1044 | newroot->elems[1] = NULL; |
1045 | newroot->kids[2] = NULL; newroot->counts[2] = 0; |
1046 | newroot->elems[2] = NULL; |
1047 | newroot->kids[3] = NULL; newroot->counts[3] = 0; |
1048 | newroot->parent = NULL; |
1049 | if (left) left->parent = newroot; |
1050 | if (right) right->parent = newroot; |
1051 | *height = 1; |
1052 | LOG((" join: same height, brand new root\n")); |
1053 | return newroot; |
1054 | } |
1055 | |
1056 | /* |
1057 | * This now works like the addition algorithm on the larger |
1058 | * tree. We're replacing a single kid pointer with two kid |
1059 | * pointers separated by an element; if that causes the node to |
1060 | * overload, we split it in two, move a separator element up to |
1061 | * the next node, and repeat. |
1062 | */ |
1063 | if (relht < 0) { |
1064 | /* |
1065 | * Left tree is shorter. Search down the right tree to find |
1066 | * the pointer we're inserting at. |
1067 | */ |
1068 | node = root = right; |
1069 | while (++relht < 0) { |
1070 | node = node->kids[0]; |
1071 | } |
1072 | ki = 0; |
1073 | right = node->kids[ki]; |
1074 | } else { |
1075 | /* |
1076 | * Right tree is shorter; search down the left to find the |
1077 | * pointer we're inserting at. |
1078 | */ |
1079 | node = root = left; |
1080 | while (--relht > 0) { |
1081 | if (node->elems[2]) |
1082 | node = node->kids[3]; |
1083 | else if (node->elems[1]) |
1084 | node = node->kids[2]; |
1085 | else |
1086 | node = node->kids[1]; |
1087 | } |
1088 | if (node->elems[2]) |
1089 | ki = 3; |
1090 | else if (node->elems[1]) |
1091 | ki = 2; |
1092 | else |
1093 | ki = 1; |
1094 | left = node->kids[ki]; |
1095 | } |
1096 | |
1097 | /* |
1098 | * Now proceed as for addition. |
1099 | */ |
1100 | *height = add234_insert(left, sep, right, &root, node, ki); |
1101 | |
1102 | return root; |
1103 | } |
1104 | static int height234(tree234 *t) { |
1105 | int level = 0; |
1106 | node234 *n = t->root; |
1107 | while (n) { |
1108 | level++; |
1109 | n = n->kids[0]; |
1110 | } |
1111 | return level; |
1112 | } |
1113 | tree234 *join234(tree234 *t1, tree234 *t2) { |
1114 | int size2 = countnode234(t2->root); |
1115 | if (size2 > 0) { |
1116 | void *element; |
1117 | int relht; |
1118 | |
1119 | if (t1->cmp) { |
1120 | element = index234(t2, 0); |
1121 | element = findrelpos234(t1, element, NULL, REL234_GE, NULL); |
1122 | if (element) |
1123 | return NULL; |
1124 | } |
1125 | |
1126 | element = delpos234(t2, 0); |
1127 | relht = height234(t1) - height234(t2); |
1128 | t1->root = join234_internal(t1->root, element, t2->root, &relht); |
1129 | t2->root = NULL; |
1130 | } |
1131 | return t1; |
1132 | } |
1133 | tree234 *join234r(tree234 *t1, tree234 *t2) { |
1134 | int size1 = countnode234(t1->root); |
1135 | if (size1 > 0) { |
1136 | void *element; |
1137 | int relht; |
1138 | |
1139 | if (t2->cmp) { |
1140 | element = index234(t1, size1-1); |
1141 | element = findrelpos234(t2, element, NULL, REL234_LE, NULL); |
1142 | if (element) |
1143 | return NULL; |
1144 | } |
1145 | |
1146 | element = delpos234(t1, size1-1); |
1147 | relht = height234(t1) - height234(t2); |
1148 | t2->root = join234_internal(t1->root, element, t2->root, &relht); |
1149 | t1->root = NULL; |
1150 | } |
1151 | return t2; |
1152 | } |
1153 | |
1154 | /* |
1155 | * Split out the first <index> elements in a tree and return a |
1156 | * pointer to the root node. Leave the root node of the remainder |
1157 | * in t. |
1158 | */ |
1159 | static node234 *split234_internal(tree234 *t, int index) { |
1160 | node234 *halves[2], *n, *sib, *sub; |
1161 | node234 *lparent, *rparent; |
1162 | int ki, pki, i, half, lcount, rcount; |
1163 | |
1164 | n = t->root; |
1165 | LOG(("splitting tree %p at point %d\n", t, index)); |
1166 | |
1167 | /* |
1168 | * Easy special cases. After this we have also dealt completely |
1169 | * with the empty-tree case and we can assume the root exists. |
1170 | */ |
1171 | if (index == 0) /* return nothing */ |
1172 | return NULL; |
1173 | if (index == countnode234(t->root)) { /* return the whole tree */ |
1174 | node234 *ret = t->root; |
1175 | t->root = NULL; |
1176 | return ret; |
1177 | } |
1178 | |
1179 | /* |
1180 | * Search down the tree to find the split point. |
1181 | */ |
1182 | lparent = rparent = NULL; |
1183 | pki = -1; |
1184 | while (n) { |
1185 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", |
1186 | n, |
1187 | n->kids[0], n->counts[0], n->elems[0], |
1188 | n->kids[1], n->counts[1], n->elems[1], |
1189 | n->kids[2], n->counts[2], n->elems[2], |
1190 | n->kids[3], n->counts[3], |
1191 | index)); |
1192 | lcount = index; |
1193 | rcount = countnode234(n) - lcount; |
1194 | if (index <= n->counts[0]) { |
1195 | ki = 0; |
1196 | } else if (index -= n->counts[0]+1, index <= n->counts[1]) { |
1197 | ki = 1; |
1198 | } else if (index -= n->counts[1]+1, index <= n->counts[2]) { |
1199 | ki = 2; |
1200 | } else { |
1201 | index -= n->counts[2]+1; |
1202 | ki = 3; |
1203 | } |
1204 | |
1205 | LOG((" splitting at subtree %d\n", ki)); |
1206 | sub = n->kids[ki]; |
1207 | |
1208 | LOG((" splitting at child index %d\n", ki)); |
1209 | |
1210 | /* |
1211 | * Split the node, put halves[0] on the right of the left |
1212 | * one and halves[1] on the left of the right one, put the |
1213 | * new node pointers in halves[0] and halves[1], and go up |
1214 | * a level. |
1215 | */ |
24055572 |
1216 | sib = snew(node234); |
720a8fb7 |
1217 | for (i = 0; i < 3; i++) { |
1218 | if (i+ki < 3 && n->elems[i+ki]) { |
1219 | sib->elems[i] = n->elems[i+ki]; |
1220 | sib->kids[i+1] = n->kids[i+ki+1]; |
1221 | if (sib->kids[i+1]) sib->kids[i+1]->parent = sib; |
1222 | sib->counts[i+1] = n->counts[i+ki+1]; |
1223 | n->elems[i+ki] = NULL; |
1224 | n->kids[i+ki+1] = NULL; |
1225 | n->counts[i+ki+1] = 0; |
1226 | } else { |
1227 | sib->elems[i] = NULL; |
1228 | sib->kids[i+1] = NULL; |
1229 | sib->counts[i+1] = 0; |
1230 | } |
1231 | } |
1232 | if (lparent) { |
1233 | lparent->kids[pki] = n; |
1234 | lparent->counts[pki] = lcount; |
1235 | n->parent = lparent; |
1236 | rparent->kids[0] = sib; |
1237 | rparent->counts[0] = rcount; |
1238 | sib->parent = rparent; |
1239 | } else { |
1240 | halves[0] = n; |
1241 | n->parent = NULL; |
1242 | halves[1] = sib; |
1243 | sib->parent = NULL; |
1244 | } |
1245 | lparent = n; |
1246 | rparent = sib; |
1247 | pki = ki; |
1248 | LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
1249 | n, |
1250 | n->kids[0], n->counts[0], n->elems[0], |
1251 | n->kids[1], n->counts[1], n->elems[1], |
1252 | n->kids[2], n->counts[2], n->elems[2], |
1253 | n->kids[3], n->counts[3])); |
1254 | LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
1255 | sib, |
1256 | sib->kids[0], sib->counts[0], sib->elems[0], |
1257 | sib->kids[1], sib->counts[1], sib->elems[1], |
1258 | sib->kids[2], sib->counts[2], sib->elems[2], |
1259 | sib->kids[3], sib->counts[3])); |
1260 | |
1261 | n = sub; |
1262 | } |
1263 | |
1264 | /* |
1265 | * We've come off the bottom here, so we've successfully split |
1266 | * the tree into two equally high subtrees. The only problem is |
1267 | * that some of the nodes down the fault line will be smaller |
1268 | * than the minimum permitted size. (Since this is a 2-3-4 |
1269 | * tree, that means they'll be zero-element one-child nodes.) |
1270 | */ |
1271 | LOG((" fell off bottom, lroot is %p, rroot is %p\n", |
1272 | halves[0], halves[1])); |
1273 | lparent->counts[pki] = rparent->counts[0] = 0; |
1274 | lparent->kids[pki] = rparent->kids[0] = NULL; |
1275 | |
1276 | /* |
1277 | * So now we go back down the tree from each of the two roots, |
1278 | * fixing up undersize nodes. |
1279 | */ |
1280 | for (half = 0; half < 2; half++) { |
1281 | /* |
1282 | * Remove the root if it's undersize (it will contain only |
1283 | * one child pointer, so just throw it away and replace it |
1284 | * with its child). This might happen several times. |
1285 | */ |
1286 | while (halves[half] && !halves[half]->elems[0]) { |
1287 | LOG((" root %p is undersize, throwing away\n", halves[half])); |
1288 | halves[half] = halves[half]->kids[0]; |
1289 | sfree(halves[half]->parent); |
1290 | halves[half]->parent = NULL; |
1291 | LOG((" new root is %p\n", halves[half])); |
1292 | } |
1293 | |
1294 | n = halves[half]; |
1295 | while (n) { |
1296 | void (*toward)(node234 *n, int ki, int *k, int *index); |
1297 | int ni, merge; |
1298 | |
1299 | /* |
1300 | * Now we have a potentially undersize node on the |
1301 | * right (if half==0) or left (if half==1). Sort it |
1302 | * out, by merging with a neighbour or by transferring |
1303 | * subtrees over. At this time we must also ensure that |
1304 | * nodes are bigger than minimum, in case we need an |
1305 | * element to merge two nodes below. |
1306 | */ |
1307 | LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", |
1308 | n, |
1309 | n->kids[0], n->counts[0], n->elems[0], |
1310 | n->kids[1], n->counts[1], n->elems[1], |
1311 | n->kids[2], n->counts[2], n->elems[2], |
1312 | n->kids[3], n->counts[3])); |
1313 | if (half == 1) { |
1314 | ki = 0; /* the kid we're interested in */ |
1315 | ni = 1; /* the neighbour */ |
1316 | merge = 0; /* for merge: leftmost of the two */ |
1317 | toward = trans234_subtree_left; |
1318 | } else { |
1319 | ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1); |
1320 | ni = ki-1; |
1321 | merge = ni; |
1322 | toward = trans234_subtree_right; |
1323 | } |
1324 | |
1325 | sub = n->kids[ki]; |
1326 | if (sub && !sub->elems[1]) { |
1327 | /* |
1328 | * This node is undersized or minimum-size. If we |
1329 | * can merge it with its neighbour, we do so; |
1330 | * otherwise we must be able to transfer subtrees |
1331 | * over to it until it is greater than minimum |
1332 | * size. |
1333 | */ |
1334 | int undersized = (!sub->elems[0]); |
1335 | LOG((" child %d is %ssize\n", ki, |
1336 | undersized ? "under" : "minimum-")); |
1337 | LOG((" neighbour is %s\n", |
1338 | n->kids[ni]->elems[2] ? "large" : |
1339 | n->kids[ni]->elems[1] ? "medium" : "small")); |
1340 | if (!n->kids[ni]->elems[1] || |
1341 | (undersized && !n->kids[ni]->elems[2])) { |
1342 | /* |
1343 | * Neighbour is small, or possibly neighbour is |
1344 | * medium and we are undersize. |
1345 | */ |
1346 | trans234_subtree_merge(n, merge, NULL, NULL); |
1347 | sub = n->kids[merge]; |
1348 | if (!n->elems[0]) { |
1349 | /* |
1350 | * n is empty, and hence must have been the |
1351 | * root and needs to be removed. |
1352 | */ |
1353 | assert(!n->parent); |
1354 | LOG((" shifting root!\n")); |
1355 | halves[half] = sub; |
1356 | halves[half]->parent = NULL; |
1357 | sfree(n); |
1358 | } |
1359 | } else { |
1360 | /* Neighbour is big enough to move trees over. */ |
1361 | toward(n, ni, NULL, NULL); |
1362 | if (undersized) |
1363 | toward(n, ni, NULL, NULL); |
1364 | } |
1365 | } |
1366 | n = sub; |
1367 | } |
1368 | } |
1369 | |
1370 | t->root = halves[1]; |
1371 | return halves[0]; |
1372 | } |
1373 | tree234 *splitpos234(tree234 *t, int index, int before) { |
1374 | tree234 *ret; |
1375 | node234 *n; |
1376 | int count; |
1377 | |
1378 | count = countnode234(t->root); |
1379 | if (index < 0 || index > count) |
1380 | return NULL; /* error */ |
1381 | ret = newtree234(t->cmp); |
1382 | n = split234_internal(t, index); |
1383 | if (before) { |
1384 | /* We want to return the ones before the index. */ |
1385 | ret->root = n; |
1386 | } else { |
1387 | /* |
1388 | * We want to keep the ones before the index and return the |
1389 | * ones after. |
1390 | */ |
1391 | ret->root = t->root; |
1392 | t->root = n; |
1393 | } |
1394 | return ret; |
1395 | } |
1396 | tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) { |
1397 | int before; |
1398 | int index; |
1399 | |
1400 | assert(rel != REL234_EQ); |
1401 | |
1402 | if (rel == REL234_GT || rel == REL234_GE) { |
1403 | before = 1; |
1404 | rel = (rel == REL234_GT ? REL234_LE : REL234_LT); |
1405 | } else { |
1406 | before = 0; |
1407 | } |
1408 | if (!findrelpos234(t, e, cmp, rel, &index)) |
1409 | index = 0; |
1410 | |
1411 | return splitpos234(t, index+1, before); |
1412 | } |
1413 | |
1414 | static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) { |
1415 | int i; |
24055572 |
1416 | node234 *n2 = snew(node234); |
720a8fb7 |
1417 | |
1418 | for (i = 0; i < 3; i++) { |
1419 | if (n->elems[i] && copyfn) |
1420 | n2->elems[i] = copyfn(copyfnstate, n->elems[i]); |
1421 | else |
1422 | n2->elems[i] = n->elems[i]; |
1423 | } |
1424 | |
1425 | for (i = 0; i < 4; i++) { |
1426 | if (n->kids[i]) { |
1427 | n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate); |
1428 | n2->kids[i]->parent = n2; |
1429 | } else { |
1430 | n2->kids[i] = NULL; |
1431 | } |
1432 | n2->counts[i] = n->counts[i]; |
1433 | } |
1434 | |
1435 | return n2; |
1436 | } |
1437 | tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) { |
1438 | tree234 *t2; |
1439 | |
1440 | t2 = newtree234(t->cmp); |
1441 | t2->root = copynode234(t->root, copyfn, copyfnstate); |
1442 | t2->root->parent = NULL; |
1443 | |
1444 | return t2; |
1445 | } |
1446 | |
1447 | #ifdef TEST |
1448 | |
1449 | /* |
1450 | * Test code for the 2-3-4 tree. This code maintains an alternative |
1451 | * representation of the data in the tree, in an array (using the |
1452 | * obvious and slow insert and delete functions). After each tree |
1453 | * operation, the verify() function is called, which ensures all |
1454 | * the tree properties are preserved: |
1455 | * - node->child->parent always equals node |
1456 | * - tree->root->parent always equals NULL |
1457 | * - number of kids == 0 or number of elements + 1; |
1458 | * - tree has the same depth everywhere |
1459 | * - every node has at least one element |
1460 | * - subtree element counts are accurate |
1461 | * - any NULL kid pointer is accompanied by a zero count |
1462 | * - in a sorted tree: ordering property between elements of a |
1463 | * node and elements of its children is preserved |
1464 | * and also ensures the list represented by the tree is the same |
1465 | * list it should be. (This last check also doubly verifies the |
1466 | * ordering properties, because the `same list it should be' is by |
1467 | * definition correctly ordered. It also ensures all nodes are |
1468 | * distinct, because the enum functions would get caught in a loop |
1469 | * if not.) |
1470 | */ |
1471 | |
1472 | #include <stdarg.h> |
1473 | |
1474 | #define srealloc realloc |
1475 | |
1476 | /* |
1477 | * Error reporting function. |
1478 | */ |
1479 | void error(char *fmt, ...) { |
1480 | va_list ap; |
1481 | printf("ERROR: "); |
1482 | va_start(ap, fmt); |
1483 | vfprintf(stdout, fmt, ap); |
1484 | va_end(ap); |
1485 | printf("\n"); |
1486 | } |
1487 | |
1488 | /* The array representation of the data. */ |
1489 | void **array; |
1490 | int arraylen, arraysize; |
1491 | cmpfn234 cmp; |
1492 | |
1493 | /* The tree representation of the same data. */ |
1494 | tree234 *tree; |
1495 | |
1496 | /* |
1497 | * Routines to provide a diagnostic printout of a tree. Currently |
1498 | * relies on every element in the tree being a one-character string |
1499 | * :-) |
1500 | */ |
1501 | typedef struct { |
1502 | char **levels; |
1503 | } dispctx; |
1504 | |
1505 | int dispnode(node234 *n, int level, dispctx *ctx) { |
1506 | if (level == 0) { |
1507 | int xpos = strlen(ctx->levels[0]); |
1508 | int len; |
1509 | |
1510 | if (n->elems[2]) |
1511 | len = sprintf(ctx->levels[0]+xpos, " %s%s%s", |
1512 | n->elems[0], n->elems[1], n->elems[2]); |
1513 | else if (n->elems[1]) |
1514 | len = sprintf(ctx->levels[0]+xpos, " %s%s", |
1515 | n->elems[0], n->elems[1]); |
1516 | else |
1517 | len = sprintf(ctx->levels[0]+xpos, " %s", |
1518 | n->elems[0]); |
1519 | return xpos + 1 + (len-1) / 2; |
1520 | } else { |
1521 | int xpos[4], nkids; |
1522 | int nodelen, mypos, myleft, x, i; |
1523 | |
1524 | xpos[0] = dispnode(n->kids[0], level-3, ctx); |
1525 | xpos[1] = dispnode(n->kids[1], level-3, ctx); |
1526 | nkids = 2; |
1527 | if (n->kids[2]) { |
1528 | xpos[2] = dispnode(n->kids[2], level-3, ctx); |
1529 | nkids = 3; |
1530 | } |
1531 | if (n->kids[3]) { |
1532 | xpos[3] = dispnode(n->kids[3], level-3, ctx); |
1533 | nkids = 4; |
1534 | } |
1535 | |
1536 | if (nkids == 4) |
1537 | mypos = (xpos[1] + xpos[2]) / 2; |
1538 | else if (nkids == 3) |
1539 | mypos = xpos[1]; |
1540 | else |
1541 | mypos = (xpos[0] + xpos[1]) / 2; |
1542 | nodelen = nkids * 2 - 1; |
1543 | myleft = mypos - ((nodelen-1)/2); |
1544 | assert(myleft >= xpos[0]); |
1545 | assert(myleft + nodelen-1 <= xpos[nkids-1]); |
1546 | |
1547 | x = strlen(ctx->levels[level]); |
1548 | while (x <= xpos[0] && x < myleft) |
1549 | ctx->levels[level][x++] = ' '; |
1550 | while (x < myleft) |
1551 | ctx->levels[level][x++] = '_'; |
1552 | if (nkids==4) |
1553 | x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.", |
1554 | n->elems[0], n->elems[1], n->elems[2]); |
1555 | else if (nkids==3) |
1556 | x += sprintf(ctx->levels[level]+x, ".%s.%s.", |
1557 | n->elems[0], n->elems[1]); |
1558 | else |
1559 | x += sprintf(ctx->levels[level]+x, ".%s.", |
1560 | n->elems[0]); |
1561 | while (x < xpos[nkids-1]) |
1562 | ctx->levels[level][x++] = '_'; |
1563 | ctx->levels[level][x] = '\0'; |
1564 | |
1565 | x = strlen(ctx->levels[level-1]); |
1566 | for (i = 0; i < nkids; i++) { |
1567 | int rpos, pos; |
1568 | rpos = xpos[i]; |
1569 | if (i > 0 && i < nkids-1) |
1570 | pos = myleft + 2*i; |
1571 | else |
1572 | pos = rpos; |
1573 | if (rpos < pos) |
1574 | rpos++; |
1575 | while (x < pos && x < rpos) |
1576 | ctx->levels[level-1][x++] = ' '; |
1577 | if (x == pos) |
1578 | ctx->levels[level-1][x++] = '|'; |
1579 | while (x < pos || x < rpos) |
1580 | ctx->levels[level-1][x++] = '_'; |
1581 | if (x == pos) |
1582 | ctx->levels[level-1][x++] = '|'; |
1583 | } |
1584 | ctx->levels[level-1][x] = '\0'; |
1585 | |
1586 | x = strlen(ctx->levels[level-2]); |
1587 | for (i = 0; i < nkids; i++) { |
1588 | int rpos = xpos[i]; |
1589 | |
1590 | while (x < rpos) |
1591 | ctx->levels[level-2][x++] = ' '; |
1592 | ctx->levels[level-2][x++] = '|'; |
1593 | } |
1594 | ctx->levels[level-2][x] = '\0'; |
1595 | |
1596 | return mypos; |
1597 | } |
1598 | } |
1599 | |
1600 | void disptree(tree234 *t) { |
1601 | dispctx ctx; |
1602 | char *leveldata; |
1603 | int width = count234(t); |
1604 | int ht = height234(t) * 3 - 2; |
1605 | int i; |
1606 | |
1607 | if (!t->root) { |
1608 | printf("[empty tree]\n"); |
1609 | } |
1610 | |
1611 | leveldata = smalloc(ht * (width+2)); |
1612 | ctx.levels = smalloc(ht * sizeof(char *)); |
1613 | for (i = 0; i < ht; i++) { |
1614 | ctx.levels[i] = leveldata + i * (width+2); |
1615 | ctx.levels[i][0] = '\0'; |
1616 | } |
1617 | |
1618 | (void) dispnode(t->root, ht-1, &ctx); |
1619 | |
1620 | for (i = ht; i-- ;) |
1621 | printf("%s\n", ctx.levels[i]); |
1622 | |
1623 | sfree(ctx.levels); |
1624 | sfree(leveldata); |
1625 | } |
1626 | |
1627 | typedef struct { |
1628 | int treedepth; |
1629 | int elemcount; |
1630 | } chkctx; |
1631 | |
1632 | int chknode(chkctx *ctx, int level, node234 *node, |
1633 | void *lowbound, void *highbound) { |
1634 | int nkids, nelems; |
1635 | int i; |
1636 | int count; |
1637 | |
1638 | /* Count the non-NULL kids. */ |
1639 | for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++); |
1640 | /* Ensure no kids beyond the first NULL are non-NULL. */ |
1641 | for (i = nkids; i < 4; i++) |
1642 | if (node->kids[i]) { |
1643 | error("node %p: nkids=%d but kids[%d] non-NULL", |
1644 | node, nkids, i); |
1645 | } else if (node->counts[i]) { |
1646 | error("node %p: kids[%d] NULL but count[%d]=%d nonzero", |
1647 | node, i, i, node->counts[i]); |
1648 | } |
1649 | |
1650 | /* Count the non-NULL elements. */ |
1651 | for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++); |
1652 | /* Ensure no elements beyond the first NULL are non-NULL. */ |
1653 | for (i = nelems; i < 3; i++) |
1654 | if (node->elems[i]) { |
1655 | error("node %p: nelems=%d but elems[%d] non-NULL", |
1656 | node, nelems, i); |
1657 | } |
1658 | |
1659 | if (nkids == 0) { |
1660 | /* |
1661 | * If nkids==0, this is a leaf node; verify that the tree |
1662 | * depth is the same everywhere. |
1663 | */ |
1664 | if (ctx->treedepth < 0) |
1665 | ctx->treedepth = level; /* we didn't know the depth yet */ |
1666 | else if (ctx->treedepth != level) |
1667 | error("node %p: leaf at depth %d, previously seen depth %d", |
1668 | node, level, ctx->treedepth); |
1669 | } else { |
1670 | /* |
1671 | * If nkids != 0, then it should be nelems+1, unless nelems |
1672 | * is 0 in which case nkids should also be 0 (and so we |
1673 | * shouldn't be in this condition at all). |
1674 | */ |
1675 | int shouldkids = (nelems ? nelems+1 : 0); |
1676 | if (nkids != shouldkids) { |
1677 | error("node %p: %d elems should mean %d kids but has %d", |
1678 | node, nelems, shouldkids, nkids); |
1679 | } |
1680 | } |
1681 | |
1682 | /* |
1683 | * nelems should be at least 1. |
1684 | */ |
1685 | if (nelems == 0) { |
1686 | error("node %p: no elems", node, nkids); |
1687 | } |
1688 | |
1689 | /* |
1690 | * Add nelems to the running element count of the whole tree. |
1691 | */ |
1692 | ctx->elemcount += nelems; |
1693 | |
1694 | /* |
1695 | * Check ordering property: all elements should be strictly > |
1696 | * lowbound, strictly < highbound, and strictly < each other in |
1697 | * sequence. (lowbound and highbound are NULL at edges of tree |
1698 | * - both NULL at root node - and NULL is considered to be < |
1699 | * everything and > everything. IYSWIM.) |
1700 | */ |
1701 | if (cmp) { |
1702 | for (i = -1; i < nelems; i++) { |
1703 | void *lower = (i == -1 ? lowbound : node->elems[i]); |
1704 | void *higher = (i+1 == nelems ? highbound : node->elems[i+1]); |
1705 | if (lower && higher && cmp(lower, higher) >= 0) { |
1706 | error("node %p: kid comparison [%d=%s,%d=%s] failed", |
1707 | node, i, lower, i+1, higher); |
1708 | } |
1709 | } |
1710 | } |
1711 | |
1712 | /* |
1713 | * Check parent pointers: all non-NULL kids should have a |
1714 | * parent pointer coming back to this node. |
1715 | */ |
1716 | for (i = 0; i < nkids; i++) |
1717 | if (node->kids[i]->parent != node) { |
1718 | error("node %p kid %d: parent ptr is %p not %p", |
1719 | node, i, node->kids[i]->parent, node); |
1720 | } |
1721 | |
1722 | |
1723 | /* |
1724 | * Now (finally!) recurse into subtrees. |
1725 | */ |
1726 | count = nelems; |
1727 | |
1728 | for (i = 0; i < nkids; i++) { |
1729 | void *lower = (i == 0 ? lowbound : node->elems[i-1]); |
1730 | void *higher = (i >= nelems ? highbound : node->elems[i]); |
1731 | int subcount = chknode(ctx, level+1, node->kids[i], lower, higher); |
1732 | if (node->counts[i] != subcount) { |
1733 | error("node %p kid %d: count says %d, subtree really has %d", |
1734 | node, i, node->counts[i], subcount); |
1735 | } |
1736 | count += subcount; |
1737 | } |
1738 | |
1739 | return count; |
1740 | } |
1741 | |
1742 | void verifytree(tree234 *tree, void **array, int arraylen) { |
1743 | chkctx ctx; |
1744 | int i; |
1745 | void *p; |
1746 | |
1747 | ctx.treedepth = -1; /* depth unknown yet */ |
1748 | ctx.elemcount = 0; /* no elements seen yet */ |
1749 | /* |
1750 | * Verify validity of tree properties. |
1751 | */ |
1752 | if (tree->root) { |
1753 | if (tree->root->parent != NULL) |
1754 | error("root->parent is %p should be null", tree->root->parent); |
1755 | chknode(&ctx, 0, tree->root, NULL, NULL); |
1756 | } |
1757 | printf("tree depth: %d\n", ctx.treedepth); |
1758 | /* |
1759 | * Enumerate the tree and ensure it matches up to the array. |
1760 | */ |
1761 | for (i = 0; NULL != (p = index234(tree, i)); i++) { |
1762 | if (i >= arraylen) |
1763 | error("tree contains more than %d elements", arraylen); |
1764 | if (array[i] != p) |
1765 | error("enum at position %d: array says %s, tree says %s", |
1766 | i, array[i], p); |
1767 | } |
1768 | if (ctx.elemcount != i) { |
1769 | error("tree really contains %d elements, enum gave %d", |
1770 | ctx.elemcount, i); |
1771 | } |
1772 | if (i < arraylen) { |
1773 | error("enum gave only %d elements, array has %d", i, arraylen); |
1774 | } |
1775 | i = count234(tree); |
1776 | if (ctx.elemcount != i) { |
1777 | error("tree really contains %d elements, count234 gave %d", |
1778 | ctx.elemcount, i); |
1779 | } |
1780 | } |
1781 | void verify(void) { verifytree(tree, array, arraylen); } |
1782 | |
1783 | void internal_addtest(void *elem, int index, void *realret) { |
1784 | int i, j; |
1785 | void *retval; |
1786 | |
1787 | if (arraysize < arraylen+1) { |
1788 | arraysize = arraylen+1+256; |
1789 | array = (array == NULL ? smalloc(arraysize*sizeof(*array)) : |
1790 | srealloc(array, arraysize*sizeof(*array))); |
1791 | } |
1792 | |
1793 | i = index; |
1794 | /* now i points to the first element >= elem */ |
1795 | retval = elem; /* expect elem returned (success) */ |
1796 | for (j = arraylen; j > i; j--) |
1797 | array[j] = array[j-1]; |
1798 | array[i] = elem; /* add elem to array */ |
1799 | arraylen++; |
1800 | |
1801 | if (realret != retval) { |
1802 | error("add: retval was %p expected %p", realret, retval); |
1803 | } |
1804 | |
1805 | verify(); |
1806 | } |
1807 | |
1808 | void addtest(void *elem) { |
1809 | int i; |
1810 | void *realret; |
1811 | |
1812 | realret = add234(tree, elem); |
1813 | |
1814 | i = 0; |
1815 | while (i < arraylen && cmp(elem, array[i]) > 0) |
1816 | i++; |
1817 | if (i < arraylen && !cmp(elem, array[i])) { |
1818 | void *retval = array[i]; /* expect that returned not elem */ |
1819 | if (realret != retval) { |
1820 | error("add: retval was %p expected %p", realret, retval); |
1821 | } |
1822 | } else |
1823 | internal_addtest(elem, i, realret); |
1824 | } |
1825 | |
1826 | void addpostest(void *elem, int i) { |
1827 | void *realret; |
1828 | |
1829 | realret = addpos234(tree, elem, i); |
1830 | |
1831 | internal_addtest(elem, i, realret); |
1832 | } |
1833 | |
1834 | void delpostest(int i) { |
1835 | int index = i; |
1836 | void *elem = array[i], *ret; |
1837 | |
1838 | /* i points to the right element */ |
1839 | while (i < arraylen-1) { |
1840 | array[i] = array[i+1]; |
1841 | i++; |
1842 | } |
1843 | arraylen--; /* delete elem from array */ |
1844 | |
1845 | if (tree->cmp) |
1846 | ret = del234(tree, elem); |
1847 | else |
1848 | ret = delpos234(tree, index); |
1849 | |
1850 | if (ret != elem) { |
1851 | error("del returned %p, expected %p", ret, elem); |
1852 | } |
1853 | |
1854 | verify(); |
1855 | } |
1856 | |
1857 | void deltest(void *elem) { |
1858 | int i; |
1859 | |
1860 | i = 0; |
1861 | while (i < arraylen && cmp(elem, array[i]) > 0) |
1862 | i++; |
1863 | if (i >= arraylen || cmp(elem, array[i]) != 0) |
1864 | return; /* don't do it! */ |
1865 | delpostest(i); |
1866 | } |
1867 | |
1868 | /* A sample data set and test utility. Designed for pseudo-randomness, |
1869 | * and yet repeatability. */ |
1870 | |
1871 | /* |
1872 | * This random number generator uses the `portable implementation' |
1873 | * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits; |
1874 | * change it if not. |
1875 | */ |
1876 | int randomnumber(unsigned *seed) { |
1877 | *seed *= 1103515245; |
1878 | *seed += 12345; |
1879 | return ((*seed) / 65536) % 32768; |
1880 | } |
1881 | |
1882 | int mycmp(void *av, void *bv) { |
1883 | char const *a = (char const *)av; |
1884 | char const *b = (char const *)bv; |
1885 | return strcmp(a, b); |
1886 | } |
1887 | |
1888 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) |
1889 | |
1890 | char *strings[] = { |
1891 | "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i", |
1892 | "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E", |
1893 | "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u", |
1894 | "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y", |
1895 | "m", "s", "l", "4", |
1896 | #if 0 |
1897 | "a", "ab", "absque", "coram", "de", |
1898 | "palam", "clam", "cum", "ex", "e", |
1899 | "sine", "tenus", "pro", "prae", |
1900 | "banana", "carrot", "cabbage", "broccoli", "onion", "zebra", |
1901 | "penguin", "blancmange", "pangolin", "whale", "hedgehog", |
1902 | "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux", |
1903 | "murfl", "spoo", "breen", "flarn", "octothorpe", |
1904 | "snail", "tiger", "elephant", "octopus", "warthog", "armadillo", |
1905 | "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin", |
1906 | "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper", |
1907 | "wand", "ring", "amulet" |
1908 | #endif |
1909 | }; |
1910 | |
1911 | #define NSTR lenof(strings) |
1912 | |
1913 | void findtest(void) { |
1914 | static const int rels[] = { |
1915 | REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT |
1916 | }; |
1917 | static const char *const relnames[] = { |
1918 | "EQ", "GE", "LE", "LT", "GT" |
1919 | }; |
1920 | int i, j, rel, index; |
1921 | char *p, *ret, *realret, *realret2; |
1922 | int lo, hi, mid, c; |
1923 | |
1924 | for (i = 0; i < (int)NSTR; i++) { |
1925 | p = strings[i]; |
1926 | for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) { |
1927 | rel = rels[j]; |
1928 | |
1929 | lo = 0; hi = arraylen-1; |
1930 | while (lo <= hi) { |
1931 | mid = (lo + hi) / 2; |
1932 | c = strcmp(p, array[mid]); |
1933 | if (c < 0) |
1934 | hi = mid-1; |
1935 | else if (c > 0) |
1936 | lo = mid+1; |
1937 | else |
1938 | break; |
1939 | } |
1940 | |
1941 | if (c == 0) { |
1942 | if (rel == REL234_LT) |
1943 | ret = (mid > 0 ? array[--mid] : NULL); |
1944 | else if (rel == REL234_GT) |
1945 | ret = (mid < arraylen-1 ? array[++mid] : NULL); |
1946 | else |
1947 | ret = array[mid]; |
1948 | } else { |
1949 | assert(lo == hi+1); |
1950 | if (rel == REL234_LT || rel == REL234_LE) { |
1951 | mid = hi; |
1952 | ret = (hi >= 0 ? array[hi] : NULL); |
1953 | } else if (rel == REL234_GT || rel == REL234_GE) { |
1954 | mid = lo; |
1955 | ret = (lo < arraylen ? array[lo] : NULL); |
1956 | } else |
1957 | ret = NULL; |
1958 | } |
1959 | |
1960 | realret = findrelpos234(tree, p, NULL, rel, &index); |
1961 | if (realret != ret) { |
1962 | error("find(\"%s\",%s) gave %s should be %s", |
1963 | p, relnames[j], realret, ret); |
1964 | } |
1965 | if (realret && index != mid) { |
1966 | error("find(\"%s\",%s) gave %d should be %d", |
1967 | p, relnames[j], index, mid); |
1968 | } |
1969 | if (realret && rel == REL234_EQ) { |
1970 | realret2 = index234(tree, index); |
1971 | if (realret2 != realret) { |
1972 | error("find(\"%s\",%s) gave %s(%d) but %d -> %s", |
1973 | p, relnames[j], realret, index, index, realret2); |
1974 | } |
1975 | } |
1976 | #if 0 |
1977 | printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j], |
1978 | realret, index); |
1979 | #endif |
1980 | } |
1981 | } |
1982 | |
1983 | realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index); |
1984 | if (arraylen && (realret != array[0] || index != 0)) { |
1985 | error("find(NULL,GT) gave %s(%d) should be %s(0)", |
1986 | realret, index, array[0]); |
1987 | } else if (!arraylen && (realret != NULL)) { |
1988 | error("find(NULL,GT) gave %s(%d) should be NULL", |
1989 | realret, index); |
1990 | } |
1991 | |
1992 | realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index); |
1993 | if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) { |
1994 | error("find(NULL,LT) gave %s(%d) should be %s(0)", |
1995 | realret, index, array[arraylen-1]); |
1996 | } else if (!arraylen && (realret != NULL)) { |
1997 | error("find(NULL,LT) gave %s(%d) should be NULL", |
1998 | realret, index); |
1999 | } |
2000 | } |
2001 | |
2002 | void splittest(tree234 *tree, void **array, int arraylen) { |
2003 | int i; |
2004 | tree234 *tree3, *tree4; |
2005 | for (i = 0; i <= arraylen; i++) { |
2006 | tree3 = copytree234(tree, NULL, NULL); |
2007 | tree4 = splitpos234(tree3, i, 0); |
2008 | verifytree(tree3, array, i); |
2009 | verifytree(tree4, array+i, arraylen-i); |
2010 | join234(tree3, tree4); |
2011 | freetree234(tree4); /* left empty by join */ |
2012 | verifytree(tree3, array, arraylen); |
2013 | freetree234(tree3); |
2014 | } |
2015 | } |
2016 | |
2017 | int main(void) { |
2018 | int in[NSTR]; |
2019 | int i, j, k; |
2020 | int tworoot, tmplen; |
2021 | unsigned seed = 0; |
2022 | tree234 *tree2, *tree3, *tree4; |
2023 | int c; |
2024 | |
2025 | setvbuf(stdout, NULL, _IOLBF, 0); |
2026 | |
2027 | for (i = 0; i < (int)NSTR; i++) in[i] = 0; |
2028 | array = NULL; |
2029 | arraylen = arraysize = 0; |
2030 | tree = newtree234(mycmp); |
2031 | cmp = mycmp; |
2032 | |
2033 | verify(); |
2034 | for (i = 0; i < 10000; i++) { |
2035 | j = randomnumber(&seed); |
2036 | j %= NSTR; |
2037 | printf("trial: %d\n", i); |
2038 | if (in[j]) { |
2039 | printf("deleting %s (%d)\n", strings[j], j); |
2040 | deltest(strings[j]); |
2041 | in[j] = 0; |
2042 | } else { |
2043 | printf("adding %s (%d)\n", strings[j], j); |
2044 | addtest(strings[j]); |
2045 | in[j] = 1; |
2046 | } |
2047 | disptree(tree); |
2048 | findtest(); |
2049 | } |
2050 | |
2051 | while (arraylen > 0) { |
2052 | j = randomnumber(&seed); |
2053 | j %= arraylen; |
2054 | deltest(array[j]); |
2055 | } |
2056 | |
2057 | freetree234(tree); |
2058 | |
2059 | /* |
2060 | * Now try an unsorted tree. We don't really need to test |
2061 | * delpos234 because we know del234 is based on it, so it's |
2062 | * already been tested in the above sorted-tree code; but for |
2063 | * completeness we'll use it to tear down our unsorted tree |
2064 | * once we've built it. |
2065 | */ |
2066 | tree = newtree234(NULL); |
2067 | cmp = NULL; |
2068 | verify(); |
2069 | for (i = 0; i < 1000; i++) { |
2070 | printf("trial: %d\n", i); |
2071 | j = randomnumber(&seed); |
2072 | j %= NSTR; |
2073 | k = randomnumber(&seed); |
2074 | k %= count234(tree)+1; |
2075 | printf("adding string %s at index %d\n", strings[j], k); |
2076 | addpostest(strings[j], k); |
2077 | } |
2078 | |
2079 | /* |
2080 | * While we have this tree in its full form, we'll take a copy |
2081 | * of it to use in split and join testing. |
2082 | */ |
2083 | tree2 = copytree234(tree, NULL, NULL); |
2084 | verifytree(tree2, array, arraylen);/* check the copy is accurate */ |
2085 | /* |
2086 | * Split tests. Split the tree at every possible point and |
2087 | * check the resulting subtrees. |
2088 | */ |
2089 | tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */ |
2090 | splittest(tree2, array, arraylen); |
2091 | /* |
2092 | * Now do the split test again, but on a tree that has a 2-root |
2093 | * (if the previous one didn't) or doesn't (if the previous one |
2094 | * did). |
2095 | */ |
2096 | tmplen = arraylen; |
2097 | while ((!tree2->root->elems[1]) == tworoot) { |
2098 | delpos234(tree2, --tmplen); |
2099 | } |
2100 | printf("now trying splits on second tree\n"); |
2101 | splittest(tree2, array, tmplen); |
2102 | freetree234(tree2); |
2103 | |
2104 | /* |
2105 | * Back to the main testing of uncounted trees. |
2106 | */ |
2107 | while (count234(tree) > 0) { |
2108 | printf("cleanup: tree size %d\n", count234(tree)); |
2109 | j = randomnumber(&seed); |
2110 | j %= count234(tree); |
2111 | printf("deleting string %s from index %d\n", (char *)array[j], j); |
2112 | delpostest(j); |
2113 | } |
2114 | freetree234(tree); |
2115 | |
2116 | /* |
2117 | * Finally, do some testing on split/join on _sorted_ trees. At |
2118 | * the same time, we'll be testing split on very small trees. |
2119 | */ |
2120 | tree = newtree234(mycmp); |
2121 | cmp = mycmp; |
2122 | arraylen = 0; |
2123 | for (i = 0; i < 16; i++) { |
2124 | addtest(strings[i]); |
2125 | tree2 = copytree234(tree, NULL, NULL); |
2126 | splittest(tree2, array, arraylen); |
2127 | freetree234(tree2); |
2128 | } |
2129 | freetree234(tree); |
2130 | |
2131 | /* |
2132 | * Test silly cases of join: join(emptytree, emptytree), and |
2133 | * also ensure join correctly spots when sorted trees fail the |
2134 | * ordering constraint. |
2135 | */ |
2136 | tree = newtree234(mycmp); |
2137 | tree2 = newtree234(mycmp); |
2138 | tree3 = newtree234(mycmp); |
2139 | tree4 = newtree234(mycmp); |
2140 | assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */ |
2141 | add234(tree2, strings[1]); |
2142 | add234(tree4, strings[0]); |
2143 | array[0] = strings[0]; |
2144 | array[1] = strings[1]; |
2145 | verifytree(tree, array, 0); |
2146 | verifytree(tree2, array+1, 1); |
2147 | verifytree(tree3, array, 0); |
2148 | verifytree(tree4, array, 1); |
2149 | |
2150 | /* |
2151 | * So: |
2152 | * - join(tree,tree3) should leave both tree and tree3 unchanged. |
2153 | * - joinr(tree,tree2) should leave both tree and tree2 unchanged. |
2154 | * - join(tree4,tree3) should leave both tree3 and tree4 unchanged. |
2155 | * - join(tree, tree2) should move the element from tree2 to tree. |
2156 | * - joinr(tree4, tree3) should move the element from tree4 to tree3. |
2157 | * - join(tree,tree3) should return NULL and leave both unchanged. |
2158 | * - join(tree3,tree) should work and create a bigger tree in tree3. |
2159 | */ |
2160 | assert(tree == join234(tree, tree3)); |
2161 | verifytree(tree, array, 0); |
2162 | verifytree(tree3, array, 0); |
2163 | assert(tree2 == join234r(tree, tree2)); |
2164 | verifytree(tree, array, 0); |
2165 | verifytree(tree2, array+1, 1); |
2166 | assert(tree4 == join234(tree4, tree3)); |
2167 | verifytree(tree3, array, 0); |
2168 | verifytree(tree4, array, 1); |
2169 | assert(tree == join234(tree, tree2)); |
2170 | verifytree(tree, array+1, 1); |
2171 | verifytree(tree2, array, 0); |
2172 | assert(tree3 == join234r(tree4, tree3)); |
2173 | verifytree(tree3, array, 1); |
2174 | verifytree(tree4, array, 0); |
2175 | assert(NULL == join234(tree, tree3)); |
2176 | verifytree(tree, array+1, 1); |
2177 | verifytree(tree3, array, 1); |
2178 | assert(tree3 == join234(tree3, tree)); |
2179 | verifytree(tree3, array, 2); |
2180 | verifytree(tree, array, 0); |
2181 | |
2182 | return 0; |
2183 | } |
2184 | |
2185 | #endif |
2186 | |
2187 | #if 0 /* sorted list of strings might be useful */ |
2188 | { |
2189 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", |
2190 | } |
2191 | #endif |