Add a licence.
[sgt/library] / btree.c
1 /*
2 * Flexible B-tree implementation. Supports reference counting for
3 * copy-on-write, user-defined node properties, and variable
4 * degree.
5 *
6 * This file is copyright 2001,2004 Simon Tatham.
7 *
8 * Permission is hereby granted, free of charge, to any person
9 * obtaining a copy of this software and associated documentation
10 * files (the "Software"), to deal in the Software without
11 * restriction, including without limitation the rights to use,
12 * copy, modify, merge, publish, distribute, sublicense, and/or
13 * sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following
15 * conditions:
16 *
17 * The above copyright notice and this permission notice shall be
18 * included in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
25 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30 /*
31 * TODO:
32 *
33 * Possibly TODO in future, but may not be sensible in this code
34 * architecture:
35 *
36 * - user write properties.
37 * * this all happens during write_unlock(), I think. Except
38 * that we'll now need an _internal_ write_unlock() which
39 * does everything except user write properties. Sigh.
40 * * note that we also need a transform function for elements
41 * (rot13 will certainly require this, and reverse will
42 * require it if the elements themselves are in some way
43 * reversible).
44 *
45 * Still untested:
46 * - searching on user read properties.
47 * - user-supplied copy function.
48 * - bt_add when element already exists.
49 * - bt_del when element doesn't.
50 * - splitpos with before==TRUE.
51 * - split() on sorted elements (but it should be fine).
52 * - bt_replace, at all (it won't be useful until we get user read
53 * properties).
54 * - bt_index_w (won't make much sense until we start using
55 * user-supplied copy fn).
56 */
57
58 #include <stdlib.h>
59 #include <string.h>
60 #include <assert.h>
61
62 #ifdef TEST
63 #include <stdio.h>
64 #include <stdarg.h>
65 #endif
66
67 #include "btree.h"
68
69 #ifdef TEST
70 static void set_invalid_property(void *prop);
71 #endif
72
73 /* ----------------------------------------------------------------------
74 * Type definitions.
75 */
76
77 typedef union nodecomponent nodecomponent;
78 typedef nodecomponent *nodeptr;
79
80 /*
81 * For type-checking purposes, and to ensure I don't accidentally
82 * confuse node_addr with node_ptr during implementation, I'll
83 * define node_addr for the in-memory case as being a struct
84 * containing only a nodeptr.
85 *
86 * This unfortunately needs to go in btree.h so that clients
87 * writing user properties can know about the nodecomponent
88 * structure.
89 */
90 typedef struct {
91 nodeptr p;
92 } node_addr;
93
94 /*
95 * A B-tree node is a horrible thing when you're trying to be
96 * flexible. It is of variable size, and it contains a variety of
97 * distinct types of thing: nodes, elements, some counters, some
98 * user-defined properties ... it's a horrible thing. So we define
99 * it as an array of unions, each union being either an `int' or a
100 * `bt_element_t' or a `node_addr'...
101 */
102
103 union nodecomponent {
104 int i;
105 node_addr na;
106 bt_element_t ep;
107 };
108
109 static const node_addr NODE_ADDR_NULL = { NULL };
110
111 /*
112 * The array of nodecomponents will take the following form:
113 *
114 * - (maxdegree) child pointers.
115 * - (maxdegree-1) element pointers.
116 * - one subtree count (current number of child pointers that are
117 * valid; note that `valid' doesn't imply non-NULL).
118 * - one element count.
119 * - one reference count.
120 */
121
122 struct btree {
123 int mindegree; /* min number of subtrees */
124 int maxdegree; /* max number of subtrees */
125 int depth; /* helps to store this explicitly */
126 node_addr root;
127 cmpfn_t cmp;
128 copyfn_t copy;
129 freefn_t freeelt;
130 int propsize, propalign, propoffset;
131 propmakefn_t propmake;
132 propmergefn_t propmerge;
133 void *userstate; /* passed to all user functions */
134 };
135
136 /* ----------------------------------------------------------------------
137 * Memory management routines and other housekeeping.
138 */
139 #ifdef HAVE_ALLOCA
140 # define ialloc(x) alloca(x)
141 # define ifree(x)
142 #else
143 # define ialloc(x) smalloc(x)
144 # define ifree(x) sfree(x)
145 #endif
146
147 #define new1(t) ( (t *) smalloc(sizeof(t)) )
148 #define newn(t, n) ( (t *) smalloc((n) * sizeof(t)) )
149 #define inew1(t) ( (t *) ialloc(sizeof(t)) )
150 #define inewn(t, n) ( (t *) ialloc((n) * sizeof(t)) )
151
152 static void *smalloc(size_t size)
153 {
154 void *ret = malloc(size);
155 if (!ret)
156 abort();
157 return ret;
158 }
159
160 static void sfree(void *p)
161 {
162 free(p);
163 }
164
165 #ifndef FALSE
166 #define FALSE 0
167 #endif
168 #ifndef TRUE
169 #define TRUE 1
170 #endif
171
172 /* We could probably do with more compiler-specific branches of this #if. */
173 #if defined(__GNUC__)
174 #define INLINE __inline
175 #else
176 #define INLINE
177 #endif
178
179 /* Hooks into the low-level code for test purposes. */
180 #ifdef TEST
181 void testlock(int write, int set, nodeptr n);
182 #else
183 #define testlock(w,s,n)
184 #endif
185
186 /* ----------------------------------------------------------------------
187 * Low-level helper routines, which understand the in-memory format
188 * of a node and know how to read-lock and write-lock.
189 */
190
191 /*
192 * Read and write the node_addr of a child.
193 */
194 static INLINE node_addr bt_child(btree *bt, nodeptr n, int index)
195 {
196 return n[index].na;
197 }
198 static INLINE void bt_set_child(btree *bt, nodeptr n,
199 int index, node_addr value)
200 {
201 n[index].na = value;
202 }
203
204 /*
205 * Read and write the address of an element.
206 */
207 static INLINE bt_element_t bt_element(btree *bt, nodeptr n, int index)
208 {
209 return n[bt->maxdegree + index].ep;
210 }
211 static INLINE void bt_set_element(btree *bt, nodeptr n,
212 int index, bt_element_t value)
213 {
214 n[bt->maxdegree + index].ep = value;
215 }
216
217 /*
218 * Give the number of subtrees currently present in an element.
219 */
220 static INLINE int bt_subtrees(btree *bt, nodeptr n)
221 {
222 return n[bt->maxdegree*2-1].i;
223 }
224 #define bt_elements(bt,n) (bt_subtrees(bt,n) - 1)
225
226 /*
227 * Give the minimum and maximum number of subtrees allowed in a
228 * node.
229 */
230 static INLINE int bt_min_subtrees(btree *bt)
231 {
232 return bt->mindegree;
233 }
234 static INLINE int bt_max_subtrees(btree *bt)
235 {
236 return bt->maxdegree;
237 }
238
239 /*
240 * Return the count of items, and the user properties, in a
241 * particular subtree of a node.
242 *
243 * Note that in the in-memory form of the tree, this breaks the
244 * read-locking semantics, by reading the counts out of the child
245 * nodes without bothering to lock them. We're allowed to do this
246 * because this function is implemented at the same very low level
247 * as the implementation of bt_read_lock(), so we're allowed to
248 * know that read locking actually doesn't do anything.
249 */
250 static INLINE int bt_child_count(btree *bt, nodeptr n, int index)
251 {
252 if (n[index].na.p)
253 return n[index].na.p[bt->maxdegree*2].i;
254 else
255 return 0;
256 }
257
258 static INLINE void *bt_child_prop(btree *bt, nodeptr n, int index)
259 {
260 if (n[index].na.p)
261 return (char *)n[index].na.p + bt->propoffset;
262 else
263 return NULL;
264 }
265
266 /*
267 * Return the count of items in a whole node.
268 */
269 static INLINE int bt_node_count(btree *bt, nodeptr n)
270 {
271 return n[bt->maxdegree*2].i;
272 }
273
274 /*
275 * Determine whether a node is a leaf node or not.
276 */
277 static INLINE int bt_is_leaf(btree *bt, nodeptr n)
278 {
279 return n[0].na.p == NULL;
280 }
281
282 /*
283 * Create a new write-locked node, and return a pointer to it.
284 */
285 static INLINE nodeptr bt_new_node(btree *bt, int nsubtrees)
286 {
287 nodeptr ret = (nodecomponent *)smalloc(bt->propoffset + bt->propsize);
288 ret[bt->maxdegree*2-1].i = nsubtrees;
289 ret[bt->maxdegree*2+1].i = 1; /* reference count 1 */
290 #ifdef TEST
291 set_invalid_property(ret + bt->maxdegree * 2 + 2);
292 #else
293 memset((char *)ret + bt->propoffset, 0, bt->propsize);
294 #endif
295 testlock(TRUE, TRUE, ret);
296 return ret;
297 }
298
299 /*
300 * Destroy a node (must be write-locked).
301 */
302 static INLINE void bt_destroy_node(btree *bt, nodeptr n)
303 {
304 testlock(TRUE, FALSE, n);
305 /* Free the property. */
306 bt->propmerge(bt->userstate, NULL, NULL, n + bt->maxdegree * 2 + 2);
307 sfree(n);
308 }
309
310 /*
311 * Take an existing node and prepare to re-use it in a new context.
312 */
313 static INLINE nodeptr bt_reuse_node(btree *bt, nodeptr n, int nsubtrees)
314 {
315 testlock(TRUE, FALSE, n);
316 testlock(TRUE, TRUE, n);
317 n[bt->maxdegree*2-1].i = nsubtrees;
318 return n;
319 }
320
321 /*
322 * Return an extra reference to a node, for purposes of cloning. So
323 * we have to update its reference count as well.
324 */
325 static INLINE node_addr bt_ref_node(btree *bt, node_addr n)
326 {
327 if (n.p)
328 n.p[bt->maxdegree*2+1].i++;
329 return n;
330 }
331
332 /*
333 * Drop a node's reference count, for purposes of freeing. Returns
334 * the new reference count. Typically this will be tested against
335 * zero to see if the node needs to be physically freed; hence a
336 * NULL node_addr causes a return of 1 (because this isn't
337 * necessary).
338 */
339 static INLINE int bt_unref_node(btree *bt, node_addr n)
340 {
341 if (n.p) {
342 n.p[bt->maxdegree*2+1].i--;
343 return n.p[bt->maxdegree*2+1].i;
344 } else
345 return 1; /* a NULL node is considered OK */
346 }
347
348 /*
349 * Clone a node during write unlocking, if its reference count is
350 * more than one.
351 */
352 static nodeptr bt_clone_node(btree *bt, nodeptr n)
353 {
354 int i;
355 nodeptr ret = (nodecomponent *)smalloc(bt->propoffset + bt->propsize);
356 memcpy(ret, n, (bt->maxdegree*2+1) * sizeof(nodecomponent));
357 if (bt->copy) {
358 for (i = 0; i < bt_elements(bt, ret); i++) {
359 bt_element_t *e = bt_element(bt, ret, i);
360 bt_set_element(bt, ret, i, bt->copy(bt->userstate, e));
361 }
362 }
363 ret[bt->maxdegree*2+1].i = 1; /* clone has reference count 1 */
364 n[bt->maxdegree*2+1].i--; /* drop original's ref count by one */
365 /*
366 * At this low level, we're allowed to reach directly into the
367 * subtrees to fiddle with their reference counts without
368 * having to lock them.
369 */
370 for (i = 0; i < bt_subtrees(bt, ret); i++) {
371 node_addr na = bt_child(bt, ret, i);
372 if (na.p)
373 na.p[bt->maxdegree*2+1].i++; /* inc ref count of each child */
374 }
375 /*
376 * Copy the user property explicitly (in case it contains a
377 * pointer to an allocated area).
378 */
379 memset((char *)ret + bt->propoffset, 0, bt->propsize);
380 bt->propmerge(bt->userstate, NULL, n + bt->maxdegree * 2 + 2,
381 ret + bt->maxdegree * 2 + 2);
382 return ret;
383 }
384
385 /*
386 * Return the node_addr for a currently locked node. NB that this
387 * means node movement must take place during _locking_ rather than
388 * unlocking!
389 */
390 static INLINE node_addr bt_node_addr(btree *bt, nodeptr n)
391 {
392 node_addr ret;
393 ret.p = n;
394 return ret;
395 }
396
397 /*
398 * The bt_write_lock and bt_read_lock functions should gracefully
399 * handle being asked to write-lock a null node pointer, and just
400 * return a null nodeptr.
401 */
402 static INLINE nodeptr bt_write_lock_child(btree *bt, nodeptr a, int index)
403 {
404 node_addr addr = bt_child(bt, a, index);
405 if (addr.p && addr.p[bt->maxdegree*2+1].i > 1) {
406 nodeptr clone = bt_clone_node(bt, addr.p);
407 bt_set_child(bt, a, index, bt_node_addr(bt, clone));
408 testlock(TRUE, TRUE, clone);
409 return clone;
410 }
411 testlock(TRUE, TRUE, addr.p);
412 return addr.p;
413 }
414 static INLINE nodeptr bt_write_lock_root(btree *bt)
415 {
416 node_addr addr = bt->root;
417 if (addr.p && addr.p[bt->maxdegree*2+1].i > 1) {
418 nodeptr clone = bt_clone_node(bt, addr.p);
419 bt->root = bt_node_addr(bt, clone);
420 testlock(TRUE, TRUE, clone);
421 return clone;
422 }
423 testlock(TRUE, TRUE, addr.p);
424 return addr.p;
425 }
426 static INLINE nodeptr bt_read_lock(btree *bt, node_addr a)
427 {
428 testlock(FALSE, TRUE, a.p);
429 return a.p;
430 }
431 #define bt_read_lock_root(bt) (bt_read_lock(bt, (bt)->root))
432 #define bt_read_lock_child(bt,a,index) (bt_read_lock(bt,bt_child(bt,a,index)))
433
434 static INLINE void bt_write_relock(btree *bt, nodeptr n, int props)
435 {
436 int i, ns, count;
437
438 /*
439 * Update the count in the node.
440 */
441 ns = bt_subtrees(bt, n);
442 count = ns-1; /* count the elements */
443 for (i = 0; i < ns; i++)
444 count += bt_child_count(bt, n, i);
445 n[bt->maxdegree*2].i = count;
446 testlock(TRUE, FALSE, n);
447 testlock(TRUE, TRUE, n);
448
449 /*
450 * Update user read properties.
451 */
452 if (props && bt->propsize) {
453 void *prevprop, *eltprop, *thisprop, *childprop;
454
455 prevprop = NULL;
456 eltprop = ialloc(bt->propsize);
457 thisprop = (void *)((char *)n + bt->propoffset);
458
459 for (i = 0; i < ns; i++) {
460 /* Merge a subtree's property into this one.
461 * Initially prevprop==NULL, meaning to just copy. */
462 if ( (childprop = bt_child_prop(bt, n, i)) != NULL ) {
463 bt->propmerge(bt->userstate, prevprop, childprop, thisprop);
464 prevprop = thisprop;
465 }
466
467 if (i < ns-1) {
468 /* Now merge in the separating element. */
469 bt->propmake(bt->userstate, bt_element(bt, n, i), eltprop);
470 bt->propmerge(bt->userstate, prevprop, eltprop, thisprop);
471 prevprop = thisprop;
472 }
473 }
474
475 ifree(eltprop);
476 }
477 }
478
479 static INLINE node_addr bt_write_unlock_internal(btree *bt, nodeptr n,
480 int props)
481 {
482 node_addr ret;
483
484 bt_write_relock(bt, n, props);
485
486 testlock(TRUE, FALSE, n);
487
488 ret.p = n;
489 return ret;
490 }
491
492 static INLINE node_addr bt_write_unlock(btree *bt, nodeptr n)
493 {
494 return bt_write_unlock_internal(bt, n, TRUE);
495 }
496
497 static INLINE void bt_read_unlock(btree *bt, nodeptr n)
498 {
499 /*
500 * For trees in memory, we do nothing here, except run some
501 * optional testing.
502 */
503 testlock(FALSE, FALSE, n);
504 }
505
506 /* ----------------------------------------------------------------------
507 * Higher-level helper functions, which should be independent of
508 * the knowledge of precise node structure in the above code.
509 */
510
511 /*
512 * Return the count of items below a node that appear before the
513 * start of a given subtree.
514 */
515 static int bt_child_startpos(btree *bt, nodeptr n, int index)
516 {
517 int pos = 0;
518
519 while (index > 0) {
520 index--;
521 pos += bt_child_count(bt, n, index) + 1; /* 1 for separating elt */
522 }
523 return pos;
524 }
525
526 /*
527 * Create a new root node for a tree.
528 */
529 static void bt_new_root(btree *bt, node_addr left, node_addr right,
530 bt_element_t element)
531 {
532 nodeptr n;
533 n = bt_new_node(bt, 2);
534 bt_set_child(bt, n, 0, left);
535 bt_set_child(bt, n, 1, right);
536 bt_set_element(bt, n, 0, element);
537 bt->root = bt_write_unlock(bt, n);
538 bt->depth++;
539 }
540
541 /*
542 * Discard the root node of a tree, and enshrine a new node as the
543 * root. Expects to be passed a write-locked nodeptr to the old
544 * root.
545 */
546 static void bt_shift_root(btree *bt, nodeptr n, node_addr na)
547 {
548 bt_destroy_node(bt, n);
549 bt->root = na;
550 bt->depth--;
551 }
552
553 /*
554 * Given a numeric index within a node, find which subtree we would
555 * descend to in order to find that index.
556 *
557 * Updates `pos' to give the numeric index within the subtree
558 * found. Also returns `ends' (if non-NULL), which has bit 0 set if
559 * the index is at the very left edge of the subtree, and/or bit 1
560 * if it's at the very right edge.
561 *
562 * Return value is the number of the subtree (0 upwards).
563 */
564 #define ENDS_NONE 0
565 #define ENDS_LEFT 1
566 #define ENDS_RIGHT 2
567 #define ENDS_BOTH 3
568 static int bt_lookup_pos(btree *bt, nodeptr n, int *pos, int *ends)
569 {
570 int child = 0;
571 int nchildren = bt_subtrees(bt, n);
572
573 while (child < nchildren) {
574 int count = bt_child_count(bt, n, child);
575 if (*pos <= count) {
576 if (ends) {
577 *ends = 0;
578 if (*pos == count)
579 *ends |= ENDS_RIGHT;
580 if (*pos == 0)
581 *ends |= ENDS_LEFT;
582 }
583 return child;
584 }
585 *pos -= count + 1; /* 1 for the separating element */
586 child++;
587 }
588 return -1; /* ran off the end; shouldn't happen */
589 }
590
591 /*
592 * Given an element to search for within a node, find either the
593 * element, or which subtree we would descend to to continue
594 * searching for that element.
595 *
596 * Return value is either the index of the element, or the index of
597 * the subtree (both 0 upwards). `is_elt' returns FALSE or TRUE
598 * respectively.
599 *
600 * Since this may be used by bt_find() with an alternative cmpfn_t,
601 * we always pass the input element as the first argument to cmp.
602 */
603 static int bt_lookup_cmp(btree *bt, nodeptr n, bt_element_t element,
604 cmpfn_t cmp, int *is_elt)
605 {
606 int mintree = 0, maxtree = bt_subtrees(bt, n)-1;
607
608 while (mintree < maxtree) {
609 int elt = (maxtree + mintree) / 2;
610 int c = cmp(bt->userstate, element, bt_element(bt, n, elt));
611
612 if (c == 0) {
613 *is_elt = TRUE;
614 return elt;
615 } else if (c < 0) {
616 /*
617 * `element' is less than element `elt'. So it can be
618 * in subtree number `elt' at the highest.
619 */
620 maxtree = elt;
621 } else { /* c > 0 */
622 /*
623 * `element' is greater than element `elt'. So it can
624 * be in subtree number (elt+1) at the lowest.
625 */
626 mintree = elt+1;
627 }
628 }
629
630 /*
631 * If we reach here without returning, we must have narrowed
632 * our search to the point where mintree = maxtree. So the
633 * element is not in the node itself and we know which subtree
634 * to search next.
635 */
636 assert(mintree == maxtree);
637 *is_elt = FALSE;
638 return mintree;
639 }
640
641 /*
642 * Generic transformations on B-tree nodes.
643 *
644 * This function divides essentially into an input side and an
645 * output side. The input side accumulates a list of items
646 * node,element,node,element,...,element,node; the output side
647 * writes those items into either one or two nodes.
648 *
649 * `intype' can be:
650 *
651 * - NODE_AS_IS. The input list is the contents of in1, followed
652 * by inelt, followed by the contents of in2. The `extra'
653 * parameters are unused, as is `inaux'.
654 *
655 * - NODE_ADD_ELT. `in2' is unused. The input list is the contents
656 * of `in1', but with subtree pointer number `inaux' replaced by
657 * extra1/inelt/extra2.
658 *
659 * - NODE_DEL_ELT. `in2' and `inelt' are unused, as is `extra2'.
660 * The input list is the contents of `in1', but with element
661 * pointer number `inaux' and its surrounding two subtrees
662 * replaced by extra1.
663 *
664 * Having obtained the input list, it is then written to one or two
665 * output nodes. If `splitpos' is NODE_JOIN, everything is written
666 * into one output node `out1'. Otherwise, `splitpos' is treated as
667 * an element index within the input list; that element is returned
668 * in `outelt', and the contents of the list is divided there and
669 * returned in nodes `out1' and `out2'.
670 *
671 * This function will re-use nodes in the `obvious' order. If two
672 * nodes are passed in and two nodes are output, they'll be the
673 * same nodes; if one node is passed in and one node output, it
674 * will be the same node too. If two are passed in and only one
675 * output, the first one will be used and the second destroyed; if
676 * one node is passed in and two are output, the one passed in will
677 * be the first of those returned, and the second will be new.
678 */
679 #define NODE_AS_IS 1
680 #define NODE_ADD_ELT 2
681 #define NODE_DEL_ELT 3
682 #define NODE_JOIN -1
683 static void bt_xform(btree *bt, int intype, int inaux,
684 nodeptr in1, nodeptr in2, bt_element_t inelt,
685 node_addr extra1, node_addr extra2,
686 int splitpos, nodeptr *out1, nodeptr *out2,
687 bt_element_t *outelt)
688 {
689 node_addr *nodes;
690 bt_element_t *elements;
691 nodeptr ret1, ret2;
692 int n1, n2, off2, i, j;
693
694 nodes = inewn(node_addr, 2 * bt_max_subtrees(bt));
695 elements = inewn(bt_element_t, 2 * bt_max_subtrees(bt));
696
697 /*
698 * Accumulate the input list.
699 */
700 switch(intype) {
701 case NODE_AS_IS:
702 n1 = bt_subtrees(bt, in1);
703 n2 = bt_subtrees(bt, in2);
704 off2 = 0;
705 break;
706 case NODE_ADD_ELT:
707 in2 = in1;
708 n1 = inaux+1;
709 n2 = bt_subtrees(bt, in1) - inaux;
710 off2 = inaux;
711 break;
712 case NODE_DEL_ELT:
713 in2 = in1;
714 n1 = inaux+1;
715 n2 = bt_subtrees(bt, in1) - inaux - 1;
716 off2 = inaux+1;
717 break;
718 }
719 i = j = 0;
720 while (j < n1) {
721 nodes[i] = bt_child(bt, in1, j);
722 if (j+1 < n1)
723 elements[i] = bt_element(bt, in1, j);
724 i++, j++;
725 }
726 if (intype == NODE_DEL_ELT) {
727 i--;
728 }
729 j = 0;
730 while (j < n2) {
731 nodes[i] = bt_child(bt, in2, off2+j);
732 if (j+1 < n2)
733 elements[i] = bt_element(bt, in2, off2+j);
734 i++, j++;
735 }
736 switch (intype) {
737 case NODE_AS_IS:
738 elements[n1-1] = inelt;
739 break;
740 case NODE_ADD_ELT:
741 nodes[n1-1] = extra1;
742 nodes[n1] = extra2;
743 elements[n1-1] = inelt;
744 break;
745 case NODE_DEL_ELT:
746 nodes[n1-1] = extra1;
747 break;
748 }
749
750 /*
751 * Now determine how many subtrees go in each output node, and
752 * actually create the nodes to be returned.
753 */
754 if (splitpos != NODE_JOIN) {
755 n1 = splitpos+1, n2 = i - splitpos - 1;
756 if (outelt)
757 *outelt = elements[splitpos];
758 } else {
759 n1 = i, n2 = 0;
760 }
761
762 ret1 = bt_reuse_node(bt, in1, n1);
763 if (intype == NODE_AS_IS && in2) {
764 /* We have a second input node. */
765 if (n2)
766 ret2 = bt_reuse_node(bt, in2, n2);
767 else
768 bt_destroy_node(bt, in2);
769 } else {
770 /* We have no second input node. */
771 if (n2)
772 ret2 = bt_new_node(bt, n2);
773 else
774 ret2 = NULL;
775 }
776
777 if (out1) *out1 = ret1;
778 if (out2) *out2 = ret2;
779
780 for (i = 0; i < n1; i++) {
781 bt_set_child(bt, ret1, i, nodes[i]);
782 if (i+1 < n1)
783 bt_set_element(bt, ret1, i, elements[i]);
784 }
785 if (n2) {
786 if (outelt) *outelt = elements[n1-1];
787 for (i = 0; i < n2; i++) {
788 bt_set_child(bt, ret2, i, nodes[n1+i]);
789 if (i+1 < n2)
790 bt_set_element(bt, ret2, i, elements[n1+i]);
791 }
792 }
793
794 ifree(nodes);
795 ifree(elements);
796 }
797
798 /*
799 * Fiddly little compare functions for use in special cases of
800 * findrelpos. One always returns +1 (a > b), the other always
801 * returns -1 (a < b).
802 */
803 static int bt_cmp_greater(void *state,
804 const bt_element_t a, const bt_element_t b)
805 {
806 return +1;
807 }
808 static int bt_cmp_less(void *state,
809 const bt_element_t a, const bt_element_t b)
810 {
811 return -1;
812 }
813
814 /* ----------------------------------------------------------------------
815 * User-visible administration routines.
816 */
817
818 btree *bt_new(cmpfn_t cmp, copyfn_t copy, freefn_t freeelt,
819 int propsize, int propalign, propmakefn_t propmake,
820 propmergefn_t propmerge, void *state, int mindegree)
821 {
822 btree *ret;
823
824 ret = new1(btree);
825 ret->mindegree = mindegree;
826 ret->maxdegree = 2*mindegree;
827 ret->depth = 0; /* not even a root right now */
828 ret->root = NODE_ADDR_NULL;
829 ret->cmp = cmp;
830 ret->copy = copy;
831 ret->freeelt = freeelt;
832 ret->propsize = propsize;
833 ret->propalign = propalign;
834 ret->propoffset = sizeof(nodecomponent) * (ret->maxdegree*2 + 2);
835 if (propalign > 0) {
836 ret->propoffset += propalign - 1;
837 ret->propoffset -= ret->propoffset % propalign;
838 }
839 ret->propmake = propmake;
840 ret->propmerge = propmerge;
841 ret->userstate = state;
842
843 return ret;
844 }
845
846 static void bt_free_node(btree *bt, nodeptr n)
847 {
848 int i;
849
850 for (i = 0; i < bt_subtrees(bt, n); i++) {
851 node_addr na;
852 nodeptr n2;
853
854 na = bt_child(bt, n, i);
855 if (!bt_unref_node(bt, na)) {
856 n2 = bt_write_lock_child(bt, n, i);
857 bt_free_node(bt, n2);
858 }
859 }
860
861 if (bt->freeelt) {
862 for (i = 0; i < bt_subtrees(bt, n)-1; i++)
863 bt->freeelt(bt->userstate, bt_element(bt, n, i));
864 }
865
866 bt_destroy_node(bt, n);
867 }
868
869 void bt_free(btree *bt)
870 {
871 nodeptr n;
872
873 if (!bt_unref_node(bt, bt->root)) {
874 n = bt_write_lock_root(bt);
875 bt_free_node(bt, n);
876 }
877
878 sfree(bt);
879 }
880
881 btree *bt_clone(btree *bt)
882 {
883 btree *bt2;
884
885 bt2 = bt_new(bt->cmp, bt->copy, bt->freeelt, bt->propsize, bt->propalign,
886 bt->propmake, bt->propmerge, bt->userstate, bt->mindegree);
887 bt2->depth = bt->depth;
888 bt2->root = bt_ref_node(bt, bt->root);
889 return bt2;
890 }
891
892 /*
893 * Nice simple function to count the size of a tree.
894 */
895 int bt_count(btree *bt)
896 {
897 int count;
898 nodeptr n;
899
900 n = bt_read_lock_root(bt);
901 if (n) {
902 count = bt_node_count(bt, n);
903 bt_read_unlock(bt, n);
904 return count;
905 } else {
906 return 0;
907 }
908 }
909
910 /* ----------------------------------------------------------------------
911 * Actual B-tree algorithms.
912 */
913
914 /*
915 * Find an element by numeric index. bt_index_w is the same, but
916 * works with write locks instead of read locks, so it guarantees
917 * to return an element with only one reference to it. (You'd use
918 * this if you were using tree cloning, and wanted to modify the
919 * element once you'd found it.)
920 */
921 bt_element_t bt_index(btree *bt, int index)
922 {
923 nodeptr n, n2;
924 int child, ends;
925
926 n = bt_read_lock_root(bt);
927
928 if (index < 0 || index >= bt_node_count(bt, n)) {
929 bt_read_unlock(bt, n);
930 return NULL;
931 }
932
933 while (1) {
934 child = bt_lookup_pos(bt, n, &index, &ends);
935 if (ends & ENDS_RIGHT) {
936 bt_element_t ret = bt_element(bt, n, child);
937 bt_read_unlock(bt, n);
938 return ret;
939 }
940 n2 = bt_read_lock_child(bt, n, child);
941 bt_read_unlock(bt, n);
942 n = n2;
943 assert(n != NULL);
944 }
945 }
946
947 bt_element_t bt_index_w(btree *bt, int index)
948 {
949 nodeptr n, n2;
950 int nnodes, child, ends;
951 nodeptr *nodes;
952 bt_element_t ret;
953
954 nodes = inewn(nodeptr, bt->depth+1);
955 nnodes = 0;
956
957 n = bt_write_lock_root(bt);
958
959 if (index < 0 || index >= bt_node_count(bt, n)) {
960 bt_write_unlock(bt, n);
961 return NULL;
962 }
963
964 while (1) {
965 nodes[nnodes++] = n;
966 child = bt_lookup_pos(bt, n, &index, &ends);
967 if (ends & ENDS_RIGHT) {
968 ret = bt_element(bt, n, child);
969 break;
970 }
971 n2 = bt_write_lock_child(bt, n, child);
972 n = n2;
973 assert(n != NULL);
974 }
975
976 while (nnodes-- > 0)
977 bt_write_unlock(bt, nodes[nnodes]);
978
979 return ret;
980 }
981
982 /*
983 * Search for an element by sorted order.
984 */
985 bt_element_t bt_findrelpos(btree *bt, bt_element_t element, cmpfn_t cmp,
986 int relation, int *index)
987 {
988 nodeptr n, n2;
989 int child, is_elt;
990 bt_element_t gotit;
991 int pos = 0;
992 int count;
993
994 if (!cmp) cmp = bt->cmp;
995
996 /*
997 * Special case: relation LT/GT and element NULL means get an
998 * extreme element of the tree. We do this by fudging the
999 * compare function so that our NULL element will be considered
1000 * infinitely large or infinitely small.
1001 */
1002 if (element == NULL) {
1003 assert(relation == BT_REL_LT || relation == BT_REL_GT);
1004 if (relation == BT_REL_LT)
1005 cmp = bt_cmp_greater; /* always returns a > b */
1006 else
1007 cmp = bt_cmp_less; /* always returns a < b */
1008 }
1009
1010 gotit = NULL;
1011 n = bt_read_lock_root(bt);
1012 if (!n)
1013 return NULL;
1014 count = bt_node_count(bt, n);
1015 while (n) {
1016 child = bt_lookup_cmp(bt, n, element, cmp, &is_elt);
1017 if (is_elt) {
1018 pos += bt_child_startpos(bt, n, child+1) - 1;
1019 gotit = bt_element(bt, n, child);
1020 bt_read_unlock(bt, n);
1021 break;
1022 } else {
1023 pos += bt_child_startpos(bt, n, child);
1024 n2 = bt_read_lock_child(bt, n, child);
1025 bt_read_unlock(bt, n);
1026 n = n2;
1027 }
1028 }
1029
1030 /*
1031 * Now all nodes are unlocked, and we are _either_ (a) holding
1032 * an element in `gotit' whose index we have in `pos', _or_ (b)
1033 * holding nothing in `gotit' but we know the index of the
1034 * next-higher element.
1035 */
1036 if (gotit) {
1037 /*
1038 * We have the real element. For EQ, LE and GE relations we
1039 * can now just return it; otherwise we must return the
1040 * next element down or up.
1041 */
1042 if (relation == BT_REL_LT)
1043 gotit = bt_index(bt, --pos);
1044 else if (relation == BT_REL_GT)
1045 gotit = bt_index(bt, ++pos);
1046 } else {
1047 /*
1048 * We don't have the real element. For EQ relation we now
1049 * just give up; for everything else we return the next
1050 * element down or up.
1051 */
1052 if (relation == BT_REL_LT || relation == BT_REL_LE)
1053 gotit = bt_index(bt, --pos);
1054 else if (relation == BT_REL_GT || relation == BT_REL_GE)
1055 gotit = bt_index(bt, pos);
1056 }
1057
1058 if (gotit && index) *index = pos;
1059 return gotit;
1060 }
1061 bt_element_t bt_findrel(btree *bt, bt_element_t element, cmpfn_t cmp,
1062 int relation)
1063 {
1064 return bt_findrelpos(bt, element, cmp, relation, NULL);
1065 }
1066 bt_element_t bt_findpos(btree *bt, bt_element_t element, cmpfn_t cmp,
1067 int *index)
1068 {
1069 return bt_findrelpos(bt, element, cmp, BT_REL_EQ, index);
1070 }
1071 bt_element_t bt_find(btree *bt, bt_element_t element, cmpfn_t cmp)
1072 {
1073 return bt_findrelpos(bt, element, cmp, BT_REL_EQ, NULL);
1074 }
1075
1076 /*
1077 * Find an element by property-based search. Returns the element
1078 * (if one is selected - the search can also terminate by
1079 * descending to a nonexistent subtree of a leaf node, equivalent
1080 * to selecting the _gap_ between two elements); also returns the
1081 * index of either the element or the gap in `*index' if `index' is
1082 * non-NULL.
1083 */
1084 bt_element_t bt_propfind(btree *bt, searchfn_t search, void *sstate,
1085 int *index)
1086 {
1087 nodeptr n, n2;
1088 int i, j, count, is_elt;
1089 void **props;
1090 int *counts;
1091 bt_element_t *elts;
1092 bt_element_t *e = NULL;
1093
1094 props = inewn(void *, bt->maxdegree);
1095 counts = inewn(int, bt->maxdegree);
1096 elts = inewn(bt_element_t, bt->maxdegree);
1097
1098 n = bt_read_lock_root(bt);
1099
1100 count = 0;
1101
1102 while (n) {
1103 int ntrees = bt_subtrees(bt, n);
1104
1105 /*
1106 * Prepare the arguments to the search function.
1107 */
1108 for (i = 0; i < ntrees; i++) {
1109 props[i] = bt_child_prop(bt, n, i);
1110 counts[i] = bt_child_count(bt, n, i);
1111 if (i < ntrees-1)
1112 elts[i] = bt_element(bt, n, i);
1113 }
1114
1115 /*
1116 * Call the search function.
1117 */
1118 i = search(bt->userstate, sstate, ntrees,
1119 props, counts, elts, &is_elt);
1120
1121 if (!is_elt) {
1122 /*
1123 * Descend to subtree i. Update `count' to consider
1124 * everything (both subtrees and elements) before that
1125 * subtree.
1126 */
1127 for (j = 0; j < i; j++)
1128 count += 1 + bt_child_count(bt, n, j);
1129 n2 = bt_read_lock_child(bt, n, i);
1130 bt_read_unlock(bt, n);
1131 n = n2;
1132 } else {
1133 /*
1134 * Return element i. Update `count' to consider
1135 * everything (both subtrees and elements) before that
1136 * element.
1137 */
1138 for (j = 0; j <= i; j++)
1139 count += 1 + bt_child_count(bt, n, j);
1140 count--; /* don't count element i itself */
1141 e = bt_element(bt, n, i);
1142 bt_read_unlock(bt, n);
1143 break;
1144 }
1145 }
1146
1147 ifree(props);
1148 ifree(counts);
1149 ifree(elts);
1150
1151 if (index) *index = count;
1152 return e;
1153 }
1154
1155 /*
1156 * Replace the element at a numeric index by a new element. Returns
1157 * the old element.
1158 *
1159 * Can also be used when the new element is the _same_ as the old
1160 * element, but has changed in some way that will affect user
1161 * properties.
1162 */
1163 bt_element_t bt_replace(btree *bt, bt_element_t element, int index)
1164 {
1165 nodeptr n;
1166 nodeptr *nodes;
1167 bt_element_t ret;
1168 int nnodes, child, ends;
1169
1170 nodes = inewn(nodeptr, bt->depth+1);
1171 nnodes = 0;
1172
1173 n = bt_write_lock_root(bt);
1174
1175 if (index < 0 || index >= bt_node_count(bt, n)) {
1176 bt_write_unlock(bt, n);
1177 return NULL;
1178 }
1179
1180 while (1) {
1181 nodes[nnodes++] = n;
1182 child = bt_lookup_pos(bt, n, &index, &ends);
1183 if (ends & ENDS_RIGHT) {
1184 ret = bt_element(bt, n, child);
1185 bt_set_element(bt, n, child, element);
1186 break;
1187 }
1188 n = bt_write_lock_child(bt, n, child);
1189 assert(n != NULL);
1190 }
1191
1192 while (nnodes-- > 0)
1193 bt_write_unlock(bt, nodes[nnodes]);
1194
1195 return ret;
1196 }
1197
1198 /*
1199 * Add at a specific position. As we search down the tree we must
1200 * write-lock every node we meet, since otherwise we might fail to
1201 * clone nodes that will end up pointing to different things.
1202 */
1203 void bt_addpos(btree *bt, bt_element_t element, int pos)
1204 {
1205 nodeptr n;
1206 node_addr left, right, single;
1207 nodeptr *nodes;
1208 int *childposns;
1209 int nnodes, child;
1210
1211 /*
1212 * Since in a reference-counted tree we can't have parent
1213 * links, we will have to use O(depth) space to store the list
1214 * of nodeptrs we have gone through, so we can un-write-lock
1215 * them when we've finished. We also store the subtree index we
1216 * descended to at each stage.
1217 */
1218 nodes = inewn(nodeptr, bt->depth+1);
1219 childposns = inewn(int, bt->depth+1);
1220 nnodes = 0;
1221
1222 n = bt_write_lock_root(bt);
1223
1224 assert(pos >= 0 && pos <= (n ? bt_node_count(bt, n) : 0));
1225
1226 /*
1227 * Scan down the tree, write-locking nodes, until we find the
1228 * empty subtree where we want to insert the item.
1229 */
1230 while (n) {
1231 nodes[nnodes] = n;
1232 child = bt_lookup_pos(bt, n, &pos, NULL);
1233 childposns[nnodes] = child;
1234 nnodes++;
1235 n = bt_write_lock_child(bt, n, child);
1236 }
1237
1238 left = right = NODE_ADDR_NULL;
1239
1240 /*
1241 * Now nodes[nnodes-1] wants to have subtree index
1242 * childposns[nnodes-1] replaced by the node/element/node triple
1243 * (left,element,right). Propagate this up the tree until we
1244 * can stop.
1245 */
1246 while (nnodes-- > 0) {
1247 n = nodes[nnodes];
1248 if (bt_subtrees(bt, n) == bt_max_subtrees(bt)) {
1249 nodeptr lptr, rptr;
1250 /* Split the node and carry on up. */
1251 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1252 n, NULL, element, left, right,
1253 bt_min_subtrees(bt), &lptr, &rptr, &element);
1254 left = bt_write_unlock(bt, lptr);
1255 right = bt_write_unlock(bt, rptr);
1256 } else {
1257 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1258 n, NULL, element, left, right,
1259 NODE_JOIN, &n, NULL, NULL);
1260 single = bt_write_unlock(bt, n);
1261 break;
1262 }
1263 }
1264
1265 /*
1266 * If nnodes < 0, we have just split the root and we need to
1267 * build a new root node.
1268 */
1269 if (nnodes < 0) {
1270 bt_new_root(bt, left, right, element);
1271 } else {
1272 /*
1273 * Now nodes[nnodes-1] just wants to have child pointer
1274 * child[nnodes-1] replaced by `single', in case the
1275 * subtree was moved. Propagate this back up to the root,
1276 * unlocking all nodes.
1277 */
1278 while (nnodes-- > 0) {
1279 bt_set_child(bt, nodes[nnodes], childposns[nnodes], single);
1280 single = bt_write_unlock(bt, nodes[nnodes]);
1281 }
1282 }
1283
1284 ifree(nodes);
1285 ifree(childposns);
1286 }
1287
1288 /*
1289 * Add an element in sorted order. This is a wrapper on bt_addpos()
1290 * which finds the numeric index to add the item at and then calls
1291 * addpos. This isn't an optimal use of time, but it saves space by
1292 * avoiding starting to clone multiply-linked nodes until it's
1293 * known that the item _can_ be added to the tree (and isn't
1294 * duplicated in it already).
1295 */
1296 bt_element_t bt_add(btree *bt, bt_element_t element)
1297 {
1298 nodeptr n, n2;
1299 int child, is_elt;
1300 int pos = 0;
1301
1302 n = bt_read_lock_root(bt);
1303 while (n) {
1304 child = bt_lookup_cmp(bt, n, element, bt->cmp, &is_elt);
1305 if (is_elt) {
1306 bt_read_unlock(bt, n);
1307 return bt_element(bt, n, child); /* element exists already */
1308 } else {
1309 pos += bt_child_startpos(bt, n, child);
1310 n2 = bt_read_lock_child(bt, n, child);
1311 bt_read_unlock(bt, n);
1312 n = n2;
1313 }
1314 }
1315 bt_addpos(bt, element, pos);
1316 return element;
1317 }
1318
1319 /*
1320 * Delete an element given its numeric position. Returns the
1321 * element deleted.
1322 */
1323 bt_element_t bt_delpos(btree *bt, int pos)
1324 {
1325 nodeptr n, c, c2, saved_n;
1326 nodeptr *nodes;
1327 int nnodes, child, nroot, pos2, ends, st, splitpoint, saved_pos;
1328 bt_element_t e, ret;
1329
1330 /*
1331 * Just like in bt_add, we store the set of nodeptrs we
1332 * write-locked on the way down, so we can unlock them on the
1333 * way back up.
1334 */
1335 nodes = inewn(nodeptr, bt->depth+1);
1336 nnodes = 0;
1337
1338 n = bt_write_lock_root(bt);
1339 nroot = TRUE;
1340 saved_n = NULL;
1341
1342 if (!n || pos < 0 || pos >= bt_node_count(bt, n)) {
1343 if (n)
1344 bt_write_unlock(bt, n);
1345 return NULL;
1346 }
1347
1348 while (1) {
1349 nodes[nnodes++] = n;
1350
1351 /*
1352 * Find out which subtree to descend to.
1353 */
1354 pos2 = pos;
1355 child = bt_lookup_pos(bt, n, &pos, &ends);
1356 c = bt_write_lock_child(bt, n, child);
1357 if (c && bt_subtrees(bt, c) == bt_min_subtrees(bt)) {
1358 /*
1359 * We're trying to descend to a subtree that's of
1360 * minimum size. Do something!
1361 */
1362 if (child > 0) {
1363 /*
1364 * Either move a subtree from the left sibling, or
1365 * merge with it. (Traditionally we would only
1366 * merge if we can't move a subtree from _either_
1367 * sibling, but this way avoids too many extra
1368 * write locks.)
1369 */
1370 c2 = c;
1371 c = bt_write_lock_child(bt, n, child-1);
1372 e = bt_element(bt, n, child-1);
1373 st = bt_subtrees(bt, c);
1374 if (st > bt_min_subtrees(bt))
1375 splitpoint = st - 2;
1376 else
1377 splitpoint = NODE_JOIN;
1378 child--;
1379 } else {
1380 /*
1381 * Likewise on the right-hand side.
1382 */
1383 c2 = bt_write_lock_child(bt, n, child+1);
1384 e = bt_element(bt, n, child);
1385 st = bt_subtrees(bt, c2);
1386 if (st > bt_min_subtrees(bt))
1387 splitpoint = bt_min_subtrees(bt);
1388 else
1389 splitpoint = NODE_JOIN;
1390 }
1391
1392 if (splitpoint == NODE_JOIN) {
1393 /*
1394 * So if we're merging nodes, go to it...
1395 */
1396 bt_xform(bt, NODE_AS_IS, 0,
1397 c, c2, e, NODE_ADDR_NULL, NODE_ADDR_NULL,
1398 NODE_JOIN, &c, NULL, NULL);
1399 bt_xform(bt, NODE_DEL_ELT, child,
1400 n, NULL, NULL, bt_node_addr(bt, c), NODE_ADDR_NULL,
1401 NODE_JOIN, &n, NULL, NULL);
1402 if (nroot && bt_subtrees(bt, n) == 1) {
1403 /*
1404 * Whoops, we just merged the last two children
1405 * of the root. Better relocate the root.
1406 */
1407 bt_shift_root(bt, n, bt_node_addr(bt, c));
1408 nnodes--; /* don't leave it in nodes[]! */
1409 n = NULL;
1410 bt_write_relock(bt, c, TRUE);
1411 } else
1412 bt_write_unlock(bt, c);
1413 } else {
1414 /*
1415 * Or if we're redistributing subtrees, go to that.
1416 */
1417 bt_xform(bt, NODE_AS_IS, 0,
1418 c, c2, e, NODE_ADDR_NULL, NODE_ADDR_NULL,
1419 splitpoint, &c, &c2, &e);
1420 bt_set_element(bt, n, child, e);
1421 bt_write_unlock(bt, c);
1422 bt_write_unlock(bt, c2);
1423 }
1424
1425 if (n) {
1426 /* Recompute the counts in n so we can do lookups again. */
1427 bt_write_relock(bt, n, TRUE);
1428
1429 /* Having done the transform, redo the position lookup. */
1430 pos = pos2;
1431 child = bt_lookup_pos(bt, n, &pos, &ends);
1432 c = bt_write_lock_child(bt, n, child);
1433 } else {
1434 pos = pos2;
1435 }
1436 }
1437
1438 /*
1439 * Now see if this node contains the element we're
1440 * looking for.
1441 */
1442 if (n && (ends & ENDS_RIGHT)) {
1443 /*
1444 * It does. Element number `child' is the element we
1445 * want to delete. See if this is a leaf node...
1446 */
1447 if (!bt_is_leaf(bt, n)) {
1448 /*
1449 * It's not a leaf node. So we save the nodeptr and
1450 * element index for later reference, and decrement
1451 * `pos' so that we're searching for the element to its
1452 * left, which _will_ be in a leaf node.
1453 */
1454 saved_n = n;
1455 saved_pos = child;
1456 pos--;
1457 } else {
1458 /*
1459 * We've reached a leaf node. Check to see if an
1460 * internal-node position was stored in saved_n and
1461 * saved_pos, and move this element there if so.
1462 */
1463 if (saved_n) {
1464 ret = bt_element(bt, saved_n, saved_pos);
1465 bt_set_element(bt, saved_n, saved_pos,
1466 bt_element(bt, n, child));
1467 } else {
1468 ret = bt_element(bt, n, child);
1469 }
1470 /* Then delete it from the leaf node. */
1471 bt_xform(bt, NODE_DEL_ELT, child,
1472 n, NULL, NULL, NODE_ADDR_NULL, NODE_ADDR_NULL,
1473 NODE_JOIN, &n, NULL, NULL);
1474 /*
1475 * Final special case: if this is the root node and
1476 * we've just deleted its last element, we should
1477 * destroy it and leave a completely empty tree.
1478 */
1479 if (nroot && bt_subtrees(bt, n) == 1) {
1480 bt_shift_root(bt, n, NODE_ADDR_NULL);
1481 nnodes--; /* and take it out of nodes[] */
1482 }
1483 /* Now we're done */
1484 break;
1485 }
1486 }
1487
1488 /* Descend to the child and go round again. */
1489 n = c;
1490 nroot = FALSE;
1491 }
1492
1493 /*
1494 * All done. Zip back up the tree un-write-locking nodes.
1495 */
1496 while (nnodes-- > 0)
1497 bt_write_unlock(bt, nodes[nnodes]);
1498
1499 ifree(nodes);
1500
1501 return ret;
1502 }
1503
1504 /*
1505 * Delete an element in sorted order.
1506 */
1507 bt_element_t bt_del(btree *bt, bt_element_t element)
1508 {
1509 int index;
1510 if (!bt_findrelpos(bt, element, NULL, BT_REL_EQ, &index))
1511 return NULL; /* wasn't there */
1512 return bt_delpos(bt, index);
1513 }
1514
1515 /*
1516 * Join two trees together, given their respective depths and a
1517 * middle element. Puts the resulting tree in the root of `bt'.
1518 *
1519 * This internal routine assumes that the trees have the same
1520 * degree.
1521 *
1522 * The input nodeptrs are assumed to be write-locked, but none of
1523 * their children are yet write-locked.
1524 */
1525 static void bt_join_internal(btree *bt, nodeptr lp, nodeptr rp,
1526 bt_element_t sep, int ld, int rd)
1527 {
1528 nodeptr *nodes;
1529 int *childposns;
1530 int nnodes, nodessize;
1531 int lsub, rsub;
1532
1533 /*
1534 * We will need to store parent nodes up to the difference
1535 * between ld and rd.
1536 */
1537 nodessize = (ld < rd ? rd-ld : ld-rd);
1538 if (nodessize) { /* we may not need _any_! */
1539 nodes = inewn(nodeptr, nodessize);
1540 childposns = inewn(int, nodessize);
1541 }
1542 nnodes = 0;
1543
1544 if (ld > rd) {
1545 bt->root = bt_node_addr(bt, lp);
1546 bt->depth = ld;
1547 /* If the left tree is taller, search down its right-hand edge. */
1548 while (ld > rd) {
1549 int child = bt_subtrees(bt, lp) - 1;
1550 nodeptr n = bt_write_lock_child(bt, lp, child);
1551 nodes[nnodes] = lp;
1552 childposns[nnodes] = child;
1553 nnodes++;
1554 lp = n;
1555 ld--;
1556 }
1557 } else {
1558 bt->root = bt_node_addr(bt, rp);
1559 bt->depth = rd;
1560 /* If the right tree is taller, search down its left-hand edge. */
1561 while (rd > ld) {
1562 nodeptr n = bt_write_lock_child(bt, rp, 0);
1563 nodes[nnodes] = rp;
1564 childposns[nnodes] = 0;
1565 nnodes++;
1566 rp = n;
1567 rd--;
1568 }
1569 }
1570
1571 /*
1572 * So we now want to combine nodes lp and rp into either one or
1573 * two plausibly-sized nodes, whichever is feasible. We have a
1574 * joining element `sep'.
1575 */
1576 lsub = (lp ? bt_subtrees(bt, lp) : 0);
1577 rsub = (rp ? bt_subtrees(bt, rp) : 0);
1578 if (lp && rp && lsub + rsub <= bt_max_subtrees(bt)) {
1579 node_addr la;
1580 /* Join the nodes into one. */
1581 bt_xform(bt, NODE_AS_IS, 0, lp, rp, sep,
1582 NODE_ADDR_NULL, NODE_ADDR_NULL,
1583 NODE_JOIN, &lp, NULL, NULL);
1584 /* Unlock the node. */
1585 la = bt_write_unlock(bt, lp);
1586 /* Update the child pointer in the next node up. */
1587 if (nnodes > 0)
1588 bt_set_child(bt, nodes[nnodes-1], childposns[nnodes-1], la);
1589 else
1590 bt->root = la;
1591 } else {
1592 node_addr la, ra;
1593 if (!lp || !rp) {
1594 la = NODE_ADDR_NULL;
1595 ra = NODE_ADDR_NULL;
1596 } else {
1597 int lsize, rsize;
1598 /* Re-split the nodes into two plausibly sized ones. */
1599 lsize = lsub + rsub;
1600 rsize = lsize / 2;
1601 lsize -= rsize;
1602 bt_xform(bt, NODE_AS_IS, 0, lp, rp, sep,
1603 NODE_ADDR_NULL, NODE_ADDR_NULL,
1604 lsize-1, &lp, &rp, &sep);
1605 /* Unlock the nodes. */
1606 la = bt_write_unlock(bt, lp);
1607 ra = bt_write_unlock(bt, rp);
1608 }
1609
1610 /*
1611 * Now we have to do the addition thing: progress up the
1612 * tree replacing a single subtree pointer with the
1613 * la/sep/ra assembly, until no more nodes have to split as
1614 * a result.
1615 */
1616 while (nnodes-- > 0) {
1617 nodeptr n = nodes[nnodes];
1618 if (bt_subtrees(bt, n) == bt_max_subtrees(bt)) {
1619 /* Split the node and carry on up. */
1620 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1621 n, NULL, sep, la, ra,
1622 bt_min_subtrees(bt), &lp, &rp, &sep);
1623 la = bt_write_unlock(bt, lp);
1624 ra = bt_write_unlock(bt, rp);
1625 } else {
1626 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1627 n, NULL, sep, la, ra,
1628 NODE_JOIN, &n, NULL, NULL);
1629 bt_write_unlock(bt, n);
1630 break;
1631 }
1632 }
1633
1634 /*
1635 * If nnodes < 0, we have just split the root and we need
1636 * to build a new root node.
1637 */
1638 if (nnodes < 0)
1639 bt_new_root(bt, la, ra, sep);
1640 }
1641
1642 /*
1643 * Now we just need to go back up and unlock any remaining
1644 * nodes. Also here we ensure the root points where it should.
1645 */
1646 while (nnodes-- > 0) {
1647 node_addr na;
1648 na = bt_write_unlock(bt, nodes[nnodes]);
1649 if (nnodes == 0)
1650 bt->root = na;
1651 }
1652
1653 if (nodessize) {
1654 ifree(nodes);
1655 ifree(childposns);
1656 }
1657 }
1658
1659 /*
1660 * External interfaces to the join functionality: join and joinr
1661 * (differing only in which B-tree structure they leave without any
1662 * elements, and which they return the combined tree in).
1663 */
1664 btree *bt_join(btree *bt1, btree *bt2)
1665 {
1666 nodeptr root1, root2;
1667 int size2;
1668
1669 size2 = bt_count(bt2);
1670 if (size2 > 0) {
1671 bt_element_t sep;
1672
1673 if (bt1->cmp) {
1674 /*
1675 * The trees are ordered, so verify the ordering
1676 * condition: ensure nothing in bt1 is greater than or
1677 * equal to the minimum element in bt2.
1678 */
1679 sep = bt_index(bt2, 0);
1680 sep = bt_findrelpos(bt1, sep, NULL, BT_REL_GE, NULL);
1681 if (sep)
1682 return NULL;
1683 }
1684
1685 sep = bt_delpos(bt2, 0);
1686 root1 = bt_write_lock_root(bt1);
1687 root2 = bt_write_lock_root(bt2);
1688 bt_join_internal(bt1, root1, root2, sep, bt1->depth, bt2->depth);
1689 bt2->root = NODE_ADDR_NULL;
1690 bt2->depth = 0;
1691 }
1692 return bt1;
1693 }
1694
1695 btree *bt_joinr(btree *bt1, btree *bt2)
1696 {
1697 nodeptr root1, root2;
1698 int size1;
1699
1700 size1 = bt_count(bt1);
1701 if (size1 > 0) {
1702 bt_element_t sep;
1703
1704 if (bt2->cmp) {
1705 /*
1706 * The trees are ordered, so verify the ordering
1707 * condition: ensure nothing in bt2 is less than or
1708 * equal to the maximum element in bt1.
1709 */
1710 sep = bt_index(bt1, size1-1);
1711 sep = bt_findrelpos(bt2, sep, NULL, BT_REL_LE, NULL);
1712 if (sep)
1713 return NULL;
1714 }
1715
1716 sep = bt_delpos(bt1, size1-1);
1717 root1 = bt_write_lock_root(bt1);
1718 root2 = bt_write_lock_root(bt2);
1719 bt_join_internal(bt2, root1, root2, sep, bt1->depth, bt2->depth);
1720 bt1->root = NODE_ADDR_NULL;
1721 bt1->depth = 0;
1722 }
1723 return bt2;
1724 }
1725
1726 /*
1727 * Perform the healing process after a tree has been split. `rhs'
1728 * is set if the cut edge is the one on the right.
1729 */
1730 static void bt_split_heal(btree *bt, int rhs)
1731 {
1732 nodeptr n;
1733 nodeptr *nodes;
1734 int nnodes;
1735
1736 nodes = inewn(nodeptr, bt->depth);
1737 nnodes = 0;
1738
1739 n = bt_write_lock_root(bt);
1740
1741 /*
1742 * First dispense with completely trivial cases: a root node
1743 * containing only one subtree can be thrown away instantly.
1744 */
1745 while (n && bt_subtrees(bt, n) == 1) {
1746 nodeptr n2 = bt_write_lock_child(bt, n, 0);
1747 bt_shift_root(bt, n, bt_node_addr(bt, n2));
1748 n = n2;
1749 }
1750
1751 /*
1752 * Now we have a plausible root node. Start going down the cut
1753 * edge looking for undersized or minimum nodes, and arranging
1754 * for them to be above minimum size.
1755 */
1756 while (n) {
1757 int edge, next, elt, size_e, size_n, size_total;
1758 nodeptr ne, nn, nl, nr;
1759 bt_element_t el;
1760
1761 nodes[nnodes++] = n;
1762
1763 if (rhs) {
1764 edge = bt_subtrees(bt, n) - 1;
1765 next = edge - 1;
1766 elt = next;
1767 } else {
1768 edge = 0;
1769 next = 1;
1770 elt = edge;
1771 }
1772
1773 ne = bt_write_lock_child(bt, n, edge);
1774 if (!ne)
1775 break;
1776
1777 size_e = bt_subtrees(bt, ne);
1778
1779 if (size_e <= bt_min_subtrees(bt)) {
1780 nn = bt_write_lock_child(bt, n, next);
1781 el = bt_element(bt, n, elt);
1782 size_n = bt_subtrees(bt, nn);
1783 if (edge < next)
1784 nl = ne, nr = nn;
1785 else
1786 nl = nn, nr = ne;
1787 size_total = size_e + size_n;
1788 if (size_e + size_n <= bt_max_subtrees(bt)) {
1789 /*
1790 * Merge the edge node and its sibling together.
1791 */
1792 bt_xform(bt, NODE_AS_IS, 0, nl, nr, el,
1793 NODE_ADDR_NULL, NODE_ADDR_NULL,
1794 NODE_JOIN, &ne, NULL, NULL);
1795 bt_xform(bt, NODE_DEL_ELT, elt, n, NULL, NULL,
1796 bt_node_addr(bt, ne), NODE_ADDR_NULL,
1797 NODE_JOIN, &n, NULL, NULL);
1798 /*
1799 * It's possible we've just trashed the root of the
1800 * tree, again.
1801 */
1802 if (bt_subtrees(bt, n) == 1) {
1803 bt_shift_root(bt, n, bt_node_addr(bt, ne));
1804 nnodes--; /* and take it out of nodes[] */
1805 }
1806 } else {
1807 /*
1808 * Redistribute subtrees between the edge node and
1809 * its sibling.
1810 */
1811 int split;
1812 size_e = (size_total + 1) / 2;
1813 assert(size_e > bt_min_subtrees(bt));
1814 if (next < edge)
1815 split = size_total - size_e - 1;
1816 else
1817 split = size_e - 1;
1818 bt_xform(bt, NODE_AS_IS, 0, nl, nr, el,
1819 NODE_ADDR_NULL, NODE_ADDR_NULL,
1820 split, &nl, &nr, &el);
1821 bt_write_unlock(bt, nn);
1822 bt_set_element(bt, n, elt, el);
1823 }
1824 }
1825
1826 n = ne;
1827 }
1828
1829 /*
1830 * Now we just need to go back up and unlock any remaining
1831 * nodes.
1832 */
1833 while (nnodes-- > 0)
1834 bt_write_unlock(bt, nodes[nnodes]);
1835
1836 ifree(nodes);
1837 }
1838
1839 /*
1840 * Split a tree by numeric position. The new tree returned is the
1841 * one on the right; the original tree contains the stuff on the
1842 * left.
1843 */
1844 static btree *bt_split_internal(btree *bt1, int index)
1845 {
1846 btree *bt2;
1847 nodeptr *lnodes, *rnodes;
1848 nodeptr n1, n2, n;
1849 int nnodes, child;
1850
1851 bt2 = bt_new(bt1->cmp, bt1->copy, bt1->freeelt, bt1->propsize,
1852 bt1->propalign, bt1->propmake, bt1->propmerge,
1853 bt1->userstate, bt1->mindegree);
1854 bt2->depth = bt1->depth;
1855
1856 lnodes = inewn(nodeptr, bt1->depth);
1857 rnodes = inewn(nodeptr, bt2->depth);
1858 nnodes = 0;
1859
1860 n1 = bt_write_lock_root(bt1);
1861 while (n1) {
1862 child = bt_lookup_pos(bt1, n1, &index, NULL);
1863 n = bt_write_lock_child(bt1, n1, child);
1864 bt_xform(bt1, NODE_ADD_ELT, child, n1, NULL, NULL,
1865 bt_node_addr(bt1, n), NODE_ADDR_NULL,
1866 child, &n1, &n2, NULL);
1867 lnodes[nnodes] = n1;
1868 rnodes[nnodes] = n2;
1869 if (nnodes > 0)
1870 bt_set_child(bt2, rnodes[nnodes-1], 0, bt_node_addr(bt2, n2));
1871 else
1872 bt2->root = bt_node_addr(bt2, n2);
1873 nnodes++;
1874 n1 = n;
1875 }
1876
1877 /*
1878 * Now we go back up and unlock all the nodes. At this point we
1879 * don't mess with user properties, because there's the danger
1880 * of a node containing no subtrees _or_ elements and hence us
1881 * having to invent a notation for an empty property. We're
1882 * going to make a second healing pass in a moment anyway,
1883 * which will sort all that out for us.
1884 */
1885 while (nnodes-- > 0) {
1886 bt_write_unlock_internal(bt1, lnodes[nnodes], FALSE);
1887 bt_write_unlock_internal(bt2, rnodes[nnodes], FALSE);
1888 }
1889
1890 /*
1891 * Then we make a healing pass down each side of the tree.
1892 */
1893 bt_split_heal(bt1, TRUE);
1894 bt_split_heal(bt2, FALSE);
1895
1896 ifree(lnodes);
1897 ifree(rnodes);
1898
1899 return bt2;
1900 }
1901
1902 /*
1903 * Split a tree at a numeric index.
1904 */
1905 btree *bt_splitpos(btree *bt, int index, int before)
1906 {
1907 btree *ret;
1908 node_addr na;
1909 int count, nd;
1910 nodeptr n;
1911
1912 n = bt_read_lock_root(bt);
1913 count = (n ? bt_node_count(bt, n) : 0);
1914 bt_read_unlock(bt, n);
1915
1916 if (index < 0 || index > count)
1917 return NULL;
1918
1919 ret = bt_split_internal(bt, index);
1920 if (before) {
1921 na = bt->root;
1922 bt->root = ret->root;
1923 ret->root = na;
1924
1925 nd = bt->depth;
1926 bt->depth = ret->depth;
1927 ret->depth = nd;
1928 }
1929 return ret;
1930 }
1931
1932 /*
1933 * Split a tree at a position dictated by the sorting order.
1934 */
1935 btree *bt_split(btree *bt, bt_element_t element, cmpfn_t cmp, int rel)
1936 {
1937 int before, index;
1938
1939 assert(rel != BT_REL_EQ); /* has to be an inequality */
1940
1941 if (rel == BT_REL_GT || rel == BT_REL_GE) {
1942 before = TRUE;
1943 rel = (rel == BT_REL_GT ? BT_REL_LE : BT_REL_LT);
1944 } else {
1945 before = FALSE;
1946 }
1947 if (!bt_findrelpos(bt, element, cmp, rel, &index))
1948 index = -1;
1949 return bt_splitpos(bt, index+1, before);
1950 }
1951
1952 #ifdef TEST
1953
1954 #define TEST_DEGREE 4
1955 #define BT_COPY bt_clone
1956 #define MAXTREESIZE 10000
1957 #define MAXLOCKS 100
1958
1959 int errors;
1960
1961 /*
1962 * Error reporting function.
1963 */
1964 void error(char *fmt, ...) {
1965 va_list ap;
1966 fprintf(stderr, "ERROR: ");
1967 va_start(ap, fmt);
1968 vfprintf(stderr, fmt, ap);
1969 va_end(ap);
1970 fprintf(stderr, "\n");
1971 errors++;
1972 }
1973
1974 /*
1975 * See if a tree has a 2-element root node.
1976 */
1977 static int bt_tworoot(btree *bt)
1978 {
1979 nodeptr n;
1980 int i;
1981 n = bt_read_lock_root(bt);
1982 i = bt_subtrees(bt, n);
1983 bt_read_unlock(bt, n);
1984 return (i == 2 ? TRUE : FALSE);
1985 }
1986
1987 /*
1988 * Physically copy an entire B-tree. (NB this appears as a test
1989 * routine rather than a production one, since reference counting
1990 * and bt_clone() provide a better way to do this for real code. If
1991 * anyone really needs a genuine physical copy for anything other
1992 * than testing reasons, I suppose they could always lift this into
1993 * the admin section above.)
1994 */
1995
1996 static nodeptr bt_copy_node(btree *bt, nodeptr n)
1997 {
1998 int i, children;
1999 nodeptr ret;
2000
2001 children = bt_subtrees(bt, n);
2002 ret = bt_new_node(bt, children);
2003
2004 for (i = 0; i < children; i++) {
2005 nodeptr n2 = bt_read_lock_child(bt, n, i);
2006 nodeptr n3;
2007 if (n2) {
2008 n3 = bt_copy_node(bt, n2);
2009 bt_set_child(bt, ret, i, bt_write_unlock(bt, n3));
2010 } else {
2011 bt_set_child(bt, ret, i, NODE_ADDR_NULL);
2012 }
2013 bt_read_unlock(bt, n2);
2014
2015 if (i < children-1) {
2016 bt_element_t e = bt_element(bt, n, i);
2017 if (bt->copy)
2018 e = bt->copy(bt->userstate, e);
2019 bt_set_element(bt, ret, i, e);
2020 }
2021 }
2022
2023 return ret;
2024 }
2025
2026 btree *bt_copy(btree *bt)
2027 {
2028 nodeptr n;
2029 btree *bt2;
2030
2031 bt2 = bt_new(bt->cmp, bt->copy, bt->freeelt, bt->propsize, bt->propalign,
2032 bt->propmake, bt->propmerge, bt->userstate, bt->mindegree);
2033 bt2->depth = bt->depth;
2034
2035 n = bt_read_lock_root(bt);
2036 if (n)
2037 bt2->root = bt_write_unlock(bt2, bt_copy_node(bt, n));
2038 bt_read_unlock(bt, n);
2039
2040 return bt2;
2041 }
2042
2043 /*
2044 * This function is intended to be called from gdb when debugging
2045 * things.
2046 */
2047 void bt_dump_nodes(btree *bt, ...)
2048 {
2049 int i, children;
2050 va_list ap;
2051 nodeptr n;
2052
2053 va_start(ap, bt);
2054 while (1) {
2055 n = va_arg(ap, nodeptr);
2056 if (!n)
2057 break;
2058 printf("%p [%d]:", n, n[bt->maxdegree*2+1].i);
2059 children = bt_subtrees(bt, n);
2060 for (i = 0; i < children; i++) {
2061 printf(" %p", bt_child(bt, n, i).p);
2062 if (i < children-1)
2063 printf(" %s", (char *)bt_element(bt, n, i));
2064 }
2065 printf("\n");
2066 }
2067 va_end(ap);
2068 }
2069
2070 /*
2071 * Verify a tree against an array. Checks that:
2072 *
2073 * - every node has a valid number of subtrees
2074 * - subtrees are either all present (internal node) or all absent
2075 * (leaf)
2076 * - elements are all present
2077 * - every leaf is at exactly the depth claimed by the tree
2078 * - the tree represents the correct list of elements in the
2079 * correct order. (This also tests the ordering constraint,
2080 * assuming the array is correctly constructed.)
2081 */
2082
2083 void verifynode(btree *bt, nodeptr n, bt_element_t *array, int *arraypos,
2084 int depth)
2085 {
2086 int subtrees, min, max, i, before, after, count;
2087
2088 /* Check the subtree count. The root can have as few as 2 subtrees. */
2089 subtrees = bt_subtrees(bt, n);
2090 max = bt_max_subtrees(bt);
2091 min = (depth == 1) ? 2 : bt_min_subtrees(bt);
2092 if (subtrees > max)
2093 error("node %p has too many subtrees (%d > %d)", n, subtrees, max);
2094 if (subtrees < min)
2095 error("node %p has too few subtrees (%d < %d)", n, subtrees, min);
2096
2097 /* Check that subtrees are present or absent as required. */
2098 for (i = 0; i < subtrees; i++) {
2099 node_addr child = bt_child(bt, n, i);
2100 if (depth == bt->depth && child.p != NULL)
2101 error("leaf node %p child %d is %p not NULL\n", n, i, child);
2102 if (depth != bt->depth && child.p == NULL)
2103 error("non-leaf node %p child %d is NULL\n", n, i);
2104 }
2105
2106 /* Check that elements are all present. */
2107 for (i = 0; i < subtrees-1; i++) {
2108 bt_element_t elt = bt_element(bt, n, i);
2109 if (elt == NULL)
2110 error("node %p element %d is NULL\n", n, i);
2111 }
2112
2113 before = *arraypos;
2114
2115 /* Now verify the subtrees, and simultaneously check the ordering. */
2116 for (i = 0; i < subtrees; i++) {
2117 if (depth < bt->depth) {
2118 nodeptr child = bt_read_lock_child(bt, n, i);
2119 verifynode(bt, child, array, arraypos, depth+1);
2120 bt_read_unlock(bt, child);
2121 }
2122 if (i < subtrees-1) {
2123 bt_element_t elt = bt_element(bt, n, i);
2124 if (array[*arraypos] != elt) {
2125 error("node %p element %d is \"%s\", but array[%d]=\"%s\"",
2126 n, i, elt, *arraypos, array[*arraypos]);
2127 }
2128 (*arraypos)++;
2129 }
2130 }
2131
2132 after = *arraypos;
2133
2134 /* Check the node count. */
2135 count = bt_node_count(bt, n);
2136 if (count != after - before)
2137 error("node %p count is %d, should be %d", n, count, after - before);
2138
2139 /*
2140 * Check the user properties.
2141 */
2142 {
2143 nodecomponent *prop;
2144 int i;
2145 int max = 0, total = 0;
2146
2147 prop = n + bt->maxdegree * 2 + 2;
2148
2149 for (i = before; i < after; i++) {
2150 int c = (unsigned char)*(char *)array[i];
2151
2152 if (max < c) max = c;
2153 total += c;
2154 }
2155
2156 if (prop[0].i != total)
2157 error("node %p total prop is %d, should be %d", n,
2158 prop[0].i, total);
2159 if (prop[1].i != max)
2160 error("node %p max prop is %d, should be %d", n,
2161 prop[1].i, max);
2162 }
2163 }
2164
2165 void verifytree(btree *bt, bt_element_t *array, int arraylen)
2166 {
2167 nodeptr n;
2168 int i = 0;
2169 n = bt_read_lock_root(bt);
2170 if (n) {
2171 verifynode(bt, n, array, &i, 1);
2172 bt_read_unlock(bt, n);
2173 } else {
2174 if (bt->depth != 0) {
2175 error("tree has null root but depth is %d not zero", bt->depth);
2176 }
2177 }
2178 if (i != arraylen)
2179 error("tree contains %d elements, array contains %d",
2180 i, arraylen);
2181 testlock(-1, 0, NULL);
2182 }
2183
2184 int mycmp(void *state, void *av, void *bv) {
2185 char const *a = (char const *)av;
2186 char const *b = (char const *)bv;
2187 return strcmp(a, b);
2188 }
2189
2190 static void set_invalid_property(void *propv)
2191 {
2192 int *prop = (int *)propv;
2193 prop[0] = prop[1] = -1;
2194 }
2195
2196 void mypropmake(void *state, void *av, void *destv)
2197 {
2198 char const *a = (char const *)av;
2199 int *dest = (int *)destv;
2200 dest[0] = dest[1] = (unsigned char)*a;
2201 }
2202
2203 void mypropmerge(void *state, void *s1v, void *s2v, void *destv)
2204 {
2205 int *s1 = (int *)s1v;
2206 int *s2 = (int *)s2v;
2207 int *dest = (int *)destv;
2208 if (!s1v && !s2v) {
2209 /* Special `destroy' case. */
2210 set_invalid_property(destv);
2211 return;
2212 }
2213 assert(s2[0] >= 0 && s2[1] >= 0);
2214 assert(s1 == NULL || (s1[0] >= 0 && s1[1] >= 0));
2215 dest[0] = s2[0] + (s1 ? s1[0] : 0);
2216 dest[1] = (s1 && s1[1] > s2[1] ? s1[1] : s2[1]);
2217 }
2218
2219 void array_addpos(bt_element_t *array, int *arraylen, bt_element_t e, int i)
2220 {
2221 bt_element_t e2;
2222 int len = *arraylen;
2223
2224 assert(len < MAXTREESIZE);
2225
2226 while (i < len) {
2227 e2 = array[i];
2228 array[i] = e;
2229 e = e2;
2230 i++;
2231 }
2232 array[len] = e;
2233 *arraylen = len+1;
2234 }
2235
2236 void array_add(bt_element_t *array, int *arraylen, bt_element_t e)
2237 {
2238 int i;
2239 int len = *arraylen;
2240
2241 for (i = 0; i < len; i++)
2242 if (mycmp(NULL, array[i], e) >= 0)
2243 break;
2244 assert(i == len || mycmp(NULL, array[i], e) != 0);
2245 array_addpos(array, arraylen, e, i);
2246 }
2247
2248 void array_delpos(bt_element_t *array, int *arraylen, int i)
2249 {
2250 int len = *arraylen;
2251
2252 while (i < len-1) {
2253 array[i] = array[i+1];
2254 i++;
2255 }
2256 *arraylen = len-1;
2257 }
2258
2259 bt_element_t array_del(bt_element_t *array, int *arraylen, bt_element_t e)
2260 {
2261 int i;
2262 int len = *arraylen;
2263 bt_element_t ret;
2264
2265 for (i = 0; i < len; i++)
2266 if (mycmp(NULL, array[i], e) >= 0)
2267 break;
2268 if (i < len && mycmp(NULL, array[i], e) == 0) {
2269 ret = array[i];
2270 array_delpos(array, arraylen, i);
2271 } else
2272 ret = NULL;
2273 return ret;
2274 }
2275
2276 /* A sample data set and test utility. Designed for pseudo-randomness,
2277 * and yet repeatability. */
2278
2279 /*
2280 * This random number generator uses the `portable implementation'
2281 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
2282 * change it if not.
2283 */
2284 int randomnumber(unsigned *seed) {
2285 *seed *= 1103515245;
2286 *seed += 12345;
2287 return ((*seed) / 65536) % 32768;
2288 }
2289
2290 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
2291
2292 char *strings[] = {
2293 "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
2294 "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
2295 "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
2296 "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
2297 "m", "s", "l", "4",
2298 };
2299
2300 #define NSTR lenof(strings)
2301
2302 void findtest(btree *tree, bt_element_t *array, int arraylen)
2303 {
2304 static const int rels[] = {
2305 BT_REL_EQ, BT_REL_GE, BT_REL_LE, BT_REL_LT, BT_REL_GT
2306 };
2307 static const char *const relnames[] = {
2308 "EQ", "GE", "LE", "LT", "GT"
2309 };
2310 int i, j, rel, index;
2311 char *p, *ret, *realret, *realret2;
2312 int lo, hi, mid, c;
2313
2314 for (i = 0; i < (int)NSTR; i++) {
2315 p = strings[i];
2316 for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
2317 rel = rels[j];
2318
2319 lo = 0; hi = arraylen-1;
2320 while (lo <= hi) {
2321 mid = (lo + hi) / 2;
2322 c = strcmp(p, array[mid]);
2323 if (c < 0)
2324 hi = mid-1;
2325 else if (c > 0)
2326 lo = mid+1;
2327 else
2328 break;
2329 }
2330
2331 if (c == 0) {
2332 if (rel == BT_REL_LT)
2333 ret = (mid > 0 ? array[--mid] : NULL);
2334 else if (rel == BT_REL_GT)
2335 ret = (mid < arraylen-1 ? array[++mid] : NULL);
2336 else
2337 ret = array[mid];
2338 } else {
2339 assert(lo == hi+1);
2340 if (rel == BT_REL_LT || rel == BT_REL_LE) {
2341 mid = hi;
2342 ret = (hi >= 0 ? array[hi] : NULL);
2343 } else if (rel == BT_REL_GT || rel == BT_REL_GE) {
2344 mid = lo;
2345 ret = (lo < arraylen ? array[lo] : NULL);
2346 } else
2347 ret = NULL;
2348 }
2349
2350 realret = bt_findrelpos(tree, p, NULL, rel, &index);
2351 testlock(-1, 0, NULL);
2352 if (realret != ret) {
2353 error("find(\"%s\",%s) gave %s should be %s",
2354 p, relnames[j], realret, ret);
2355 }
2356 if (realret && index != mid) {
2357 error("find(\"%s\",%s) gave %d should be %d",
2358 p, relnames[j], index, mid);
2359 }
2360 if (realret && rel == BT_REL_EQ) {
2361 realret2 = bt_index(tree, index);
2362 if (realret2 != realret) {
2363 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
2364 p, relnames[j], realret, index, index, realret2);
2365 }
2366 }
2367 }
2368 }
2369
2370 realret = bt_findrelpos(tree, NULL, NULL, BT_REL_GT, &index);
2371 testlock(-1, 0, NULL);
2372 if (arraylen && (realret != array[0] || index != 0)) {
2373 error("find(NULL,GT) gave %s(%d) should be %s(0)",
2374 realret, index, array[0]);
2375 } else if (!arraylen && (realret != NULL)) {
2376 error("find(NULL,GT) gave %s(%d) should be NULL",
2377 realret, index);
2378 }
2379
2380 realret = bt_findrelpos(tree, NULL, NULL, BT_REL_LT, &index);
2381 testlock(-1, 0, NULL);
2382 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
2383 error("find(NULL,LT) gave %s(%d) should be %s(0)",
2384 realret, index, array[arraylen-1]);
2385 } else if (!arraylen && (realret != NULL)) {
2386 error("find(NULL,LT) gave %s(%d) should be NULL",
2387 realret, index);
2388 }
2389 }
2390
2391 void splittest(btree *tree, bt_element_t *array, int arraylen)
2392 {
2393 int i;
2394 btree *tree3, *tree4;
2395 for (i = 0; i <= arraylen; i++) {
2396 printf("splittest: %d\n", i);
2397 tree3 = BT_COPY(tree);
2398 testlock(-1, 0, NULL);
2399 tree4 = bt_splitpos(tree3, i, 0);
2400 testlock(-1, 0, NULL);
2401 verifytree(tree3, array, i);
2402 verifytree(tree4, array+i, arraylen-i);
2403 bt_join(tree3, tree4);
2404 testlock(-1, 0, NULL);
2405 verifytree(tree4, NULL, 0);
2406 bt_free(tree4); /* left empty by join */
2407 testlock(-1, 0, NULL);
2408 verifytree(tree3, array, arraylen);
2409 bt_free(tree3);
2410 testlock(-1, 0, NULL);
2411 }
2412 }
2413
2414 /*
2415 * Called to track read and write locks on nodes.
2416 */
2417 void testlock(int write, int set, nodeptr n)
2418 {
2419 static nodeptr readlocks[MAXLOCKS], writelocks[MAXLOCKS];
2420 static int nreadlocks = 0, nwritelocks = 0;
2421
2422 int i, rp, wp;
2423
2424 if (write == -1) {
2425 /* Called after an operation to ensure all locks are unlocked. */
2426 if (nreadlocks != 0 || nwritelocks != 0)
2427 error("at least one left-behind lock exists!");
2428 return;
2429 }
2430
2431 /* Locking NULL does nothing. Unlocking it is an error. */
2432 if (n == NULL) {
2433 if (!set)
2434 error("attempting to %s-unlock NULL", write ? "write" : "read");
2435 return;
2436 }
2437
2438 assert(nreadlocks < MAXLOCKS && nwritelocks < MAXLOCKS);
2439
2440 /* First look for the node in both lock lists. */
2441 rp = wp = -1;
2442 for (i = 0; i < nreadlocks; i++)
2443 if (readlocks[i] == n)
2444 rp = i;
2445 for (i = 0; i < nwritelocks; i++)
2446 if (writelocks[i] == n)
2447 wp = i;
2448
2449 /* Now diverge based on what we're supposed to be up to. */
2450 if (set) {
2451 /* Setting a lock. Should not already be locked in either list. */
2452 if (rp != -1 || wp != -1) {
2453 error("attempt to %s-lock node %p, already %s-locked",
2454 (write ? "write" : "read"), n, (rp==-1 ? "write" : "read"));
2455 }
2456 if (write)
2457 writelocks[nwritelocks++] = n;
2458 else
2459 readlocks[nreadlocks++] = n;
2460 } else {
2461 /* Clearing a lock. Should exist in exactly the correct list. */
2462 if (write && rp != -1)
2463 error("attempt to write-unlock node %p which is read-locked", n);
2464 if (!write && wp != -1)
2465 error("attempt to read-unlock node %p which is write-locked", n);
2466 if (wp != -1) {
2467 nwritelocks--;
2468 for (i = wp; i < nwritelocks; i++)
2469 writelocks[i] = writelocks[i+1];
2470 }
2471 if (rp != -1) {
2472 nreadlocks--;
2473 for (i = rp; i < nreadlocks; i++)
2474 readlocks[i] = readlocks[i+1];
2475 }
2476 }
2477 }
2478
2479 int main(void) {
2480 int in[NSTR];
2481 int i, j, k;
2482 int tworoot, tmplen;
2483 unsigned seed = 0;
2484 bt_element_t *array;
2485 int arraylen;
2486 bt_element_t ret, ret2, item;
2487 btree *tree, *tree2, *tree3, *tree4;
2488
2489 setvbuf(stdout, NULL, _IOLBF, 0);
2490 setvbuf(stderr, NULL, _IOLBF, 0);
2491 errors = 0;
2492
2493 for (i = 0; i < (int)NSTR; i++) in[i] = 0;
2494 array = newn(bt_element_t, MAXTREESIZE);
2495 arraylen = 0;
2496 tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2497 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2498
2499 verifytree(tree, array, arraylen);
2500 for (i = 0; i < 10000; i++) {
2501 j = randomnumber(&seed);
2502 j %= NSTR;
2503 printf("trial: %d\n", i);
2504 if (in[j]) {
2505 printf("deleting %s (%d)\n", strings[j], j);
2506 ret2 = array_del(array, &arraylen, strings[j]);
2507 ret = bt_del(tree, strings[j]);
2508 testlock(-1, 0, NULL);
2509 assert((bt_element_t)strings[j] == ret && ret == ret2);
2510 verifytree(tree, array, arraylen);
2511 in[j] = 0;
2512 } else {
2513 printf("adding %s (%d)\n", strings[j], j);
2514 array_add(array, &arraylen, strings[j]);
2515 ret = bt_add(tree, strings[j]);
2516 testlock(-1, 0, NULL);
2517 assert(strings[j] == ret);
2518 verifytree(tree, array, arraylen);
2519 in[j] = 1;
2520 }
2521 /* disptree(tree); */
2522 findtest(tree, array, arraylen);
2523 }
2524
2525 while (arraylen > 0) {
2526 j = randomnumber(&seed);
2527 j %= arraylen;
2528 item = array[j];
2529 ret2 = array_del(array, &arraylen, item);
2530 ret = bt_del(tree, item);
2531 testlock(-1, 0, NULL);
2532 assert(ret2 == ret);
2533 verifytree(tree, array, arraylen);
2534 }
2535
2536 bt_free(tree);
2537 testlock(-1, 0, NULL);
2538
2539 /*
2540 * Now try an unsorted tree. We don't really need to test
2541 * delpos because we know del is based on it, so it's already
2542 * been tested in the above sorted-tree code; but for
2543 * completeness we'll use it to tear down our unsorted tree
2544 * once we've built it.
2545 */
2546 tree = bt_new(NULL, NULL, NULL, 2*sizeof(int), alignof(int),
2547 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2548 verifytree(tree, array, arraylen);
2549 for (i = 0; i < 1000; i++) {
2550 printf("trial: %d\n", i);
2551 j = randomnumber(&seed);
2552 j %= NSTR;
2553 k = randomnumber(&seed);
2554 k %= bt_count(tree)+1;
2555 testlock(-1, 0, NULL);
2556 printf("adding string %s at index %d\n", strings[j], k);
2557 array_addpos(array, &arraylen, strings[j], k);
2558 bt_addpos(tree, strings[j], k);
2559 testlock(-1, 0, NULL);
2560 verifytree(tree, array, arraylen);
2561 }
2562
2563 /*
2564 * While we have this tree in its full form, we'll take a copy
2565 * of it to use in split and join testing.
2566 */
2567 tree2 = BT_COPY(tree);
2568 testlock(-1, 0, NULL);
2569 verifytree(tree2, array, arraylen);/* check the copy is accurate */
2570 /*
2571 * Split tests. Split the tree at every possible point and
2572 * check the resulting subtrees.
2573 */
2574 tworoot = bt_tworoot(tree2); /* see if it has a 2-root */
2575 testlock(-1, 0, NULL);
2576 splittest(tree2, array, arraylen);
2577 /*
2578 * Now do the split test again, but on a tree that has a 2-root
2579 * (if the previous one didn't) or doesn't (if the previous one
2580 * did).
2581 */
2582 tmplen = arraylen;
2583 while (bt_tworoot(tree2) == tworoot) {
2584 bt_delpos(tree2, --tmplen);
2585 testlock(-1, 0, NULL);
2586 }
2587 printf("now trying splits on second tree\n");
2588 splittest(tree2, array, tmplen);
2589 bt_free(tree2);
2590 testlock(-1, 0, NULL);
2591
2592 /*
2593 * Back to the main testing of uncounted trees.
2594 */
2595 while (bt_count(tree) > 0) {
2596 printf("cleanup: tree size %d\n", bt_count(tree));
2597 j = randomnumber(&seed);
2598 j %= bt_count(tree);
2599 printf("deleting string %s from index %d\n", (char *)array[j], j);
2600 ret = bt_delpos(tree, j);
2601 testlock(-1, 0, NULL);
2602 assert((bt_element_t)array[j] == ret);
2603 array_delpos(array, &arraylen, j);
2604 verifytree(tree, array, arraylen);
2605 }
2606 bt_free(tree);
2607 testlock(-1, 0, NULL);
2608
2609 /*
2610 * Finally, do some testing on split/join on _sorted_ trees. At
2611 * the same time, we'll be testing split on very small trees.
2612 */
2613 tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2614 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2615 arraylen = 0;
2616 for (i = 0; i < 16; i++) {
2617 array_add(array, &arraylen, strings[i]);
2618 ret = bt_add(tree, strings[i]);
2619 testlock(-1, 0, NULL);
2620 assert(strings[i] == ret);
2621 verifytree(tree, array, arraylen);
2622 tree2 = BT_COPY(tree);
2623 splittest(tree2, array, arraylen);
2624 testlock(-1, 0, NULL);
2625 bt_free(tree2);
2626 testlock(-1, 0, NULL);
2627 }
2628 bt_free(tree);
2629 testlock(-1, 0, NULL);
2630
2631 /*
2632 * Test silly cases of join: join(emptytree, emptytree), and
2633 * also ensure join correctly spots when sorted trees fail the
2634 * ordering constraint.
2635 */
2636 tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2637 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2638 tree2 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2639 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2640 tree3 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2641 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2642 tree4 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2643 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2644 assert(mycmp(NULL, strings[0], strings[1]) < 0); /* just in case :-) */
2645 bt_add(tree2, strings[1]);
2646 testlock(-1, 0, NULL);
2647 bt_add(tree4, strings[0]);
2648 testlock(-1, 0, NULL);
2649 array[0] = strings[0];
2650 array[1] = strings[1];
2651 verifytree(tree, array, 0);
2652 verifytree(tree2, array+1, 1);
2653 verifytree(tree3, array, 0);
2654 verifytree(tree4, array, 1);
2655
2656 /*
2657 * So:
2658 * - join(tree,tree3) should leave both tree and tree3 unchanged.
2659 * - joinr(tree,tree2) should leave both tree and tree2 unchanged.
2660 * - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
2661 * - join(tree, tree2) should move the element from tree2 to tree.
2662 * - joinr(tree4, tree3) should move the element from tree4 to tree3.
2663 * - join(tree,tree3) should return NULL and leave both unchanged.
2664 * - join(tree3,tree) should work and create a bigger tree in tree3.
2665 */
2666 assert(tree == bt_join(tree, tree3));
2667 testlock(-1, 0, NULL);
2668 verifytree(tree, array, 0);
2669 verifytree(tree3, array, 0);
2670 assert(tree2 == bt_joinr(tree, tree2));
2671 testlock(-1, 0, NULL);
2672 verifytree(tree, array, 0);
2673 verifytree(tree2, array+1, 1);
2674 assert(tree4 == bt_join(tree4, tree3));
2675 testlock(-1, 0, NULL);
2676 verifytree(tree3, array, 0);
2677 verifytree(tree4, array, 1);
2678 assert(tree == bt_join(tree, tree2));
2679 testlock(-1, 0, NULL);
2680 verifytree(tree, array+1, 1);
2681 verifytree(tree2, array, 0);
2682 assert(tree3 == bt_joinr(tree4, tree3));
2683 testlock(-1, 0, NULL);
2684 verifytree(tree3, array, 1);
2685 verifytree(tree4, array, 0);
2686 assert(NULL == bt_join(tree, tree3));
2687 testlock(-1, 0, NULL);
2688 verifytree(tree, array+1, 1);
2689 verifytree(tree3, array, 1);
2690 assert(tree3 == bt_join(tree3, tree));
2691 testlock(-1, 0, NULL);
2692 verifytree(tree3, array, 2);
2693 verifytree(tree, array, 0);
2694
2695 bt_free(tree);
2696 testlock(-1, 0, NULL);
2697 bt_free(tree2);
2698 testlock(-1, 0, NULL);
2699 bt_free(tree3);
2700 testlock(-1, 0, NULL);
2701 bt_free(tree4);
2702 testlock(-1, 0, NULL);
2703
2704 sfree(array);
2705
2706 if (errors)
2707 fprintf(stderr, "%d errors!\n", errors);
2708 return (errors != 0 ? 1 : 0);
2709 }
2710
2711 #endif