mdw@git.distorted.org.uk Git - sgt/library/blob - btree.c

   1 /*
   2  * Flexible B-tree implementation. Supports reference counting for
   3  * copy-on-write, user-defined node properties, and variable
   4  * degree.
   5  *
   6  * This file is copyright 2001,2004 Simon Tatham.
   7  *
   8  * Permission is hereby granted, free of charge, to any person
   9  * obtaining a copy of this software and associated documentation
  10  * files (the "Software"), to deal in the Software without
  11  * restriction, including without limitation the rights to use,
  12  * copy, modify, merge, publish, distribute, sublicense, and/or
  13  * sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following
  15  * conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be
  18  * included in all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  22  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23  * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
  24  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  25  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27  * SOFTWARE.
  28  */
  29
  30 /*
  31  * TODO:
  32  *
  33  * Possibly TODO in future, but may not be sensible in this code
  34  * architecture:
  35  *
  36  *  - user write properties.
  37  *     * this all happens during write_unlock(), I think. Except
  38  *       that we'll now need an _internal_ write_unlock() which
  39  *       does everything except user write properties. Sigh.
  40  *     * note that we also need a transform function for elements
  41  *       (rot13 will certainly require this, and reverse will
  42  *       require it if the elements themselves are in some way
  43  *       reversible).
  44  *
  45  * Still untested:
  46  *  - searching on user read properties.
  47  *  - user-supplied copy function.
  48  *  - bt_add when element already exists.
  49  *  - bt_del when element doesn't.
  50  *  - splitpos with before==TRUE.
  51  *  - split() on sorted elements (but it should be fine).
  52  *  - bt_replace, at all (it won't be useful until we get user read
  53  *    properties).
  54  *  - bt_index_w (won't make much sense until we start using
  55  *    user-supplied copy fn).
  56  */
  57
  58 #include <stdlib.h>
  59 #include <string.h>
  60 #include <assert.h>
  61
  62 #ifdef TEST
  63 #include <stdio.h>
  64 #include <stdarg.h>
  65 #endif
  66
  67 #include "btree.h"
  68
  69 #ifdef TEST
  70 static void set_invalid_property(void *prop);
  71 #endif
  72
  73 /* ----------------------------------------------------------------------
  74  * Type definitions.
  75  */
  76
  77 typedef union nodecomponent nodecomponent;
  78 typedef nodecomponent *nodeptr;
  79
  80 /*
  81  * For type-checking purposes, and to ensure I don't accidentally
  82  * confuse node_addr with node_ptr during implementation, I'll
  83  * define node_addr for the in-memory case as being a struct
  84  * containing only a nodeptr.
  85  *
  86  * This unfortunately needs to go in btree.h so that clients
  87  * writing user properties can know about the nodecomponent
  88  * structure.
  89  */
  90 typedef struct {
  91     nodeptr p;
  92 } node_addr;
  93
  94 /*
  95  * A B-tree node is a horrible thing when you're trying to be
  96  * flexible. It is of variable size, and it contains a variety of
  97  * distinct types of thing: nodes, elements, some counters, some
  98  * user-defined properties ... it's a horrible thing. So we define
  99  * it as an array of unions, each union being either an `int' or a
 100  * `bt_element_t' or a `node_addr'...
 101  */
 102
 103 union nodecomponent {
 104     int i;
 105     node_addr na;
 106     bt_element_t ep;
 107 };
 108
 109 static const node_addr NODE_ADDR_NULL = { NULL };
 110
 111 /*
 112  * The array of nodecomponents will take the following form:
 113  *
 114  *  - (maxdegree) child pointers.
 115  *  - (maxdegree-1) element pointers.
 116  *  - one subtree count (current number of child pointers that are
 117  *    valid; note that `valid' doesn't imply non-NULL).
 118  *  - one element count.
 119  *  - one reference count.
 120  */
 121
 122 struct btree {
 123     int mindegree;                     /* min number of subtrees */
 124     int maxdegree;                     /* max number of subtrees */
 125     int depth;                         /* helps to store this explicitly */
 126     node_addr root;
 127     cmpfn_t cmp;
 128     copyfn_t copy;
 129     freefn_t freeelt;
 130     int propsize, propalign, propoffset;
 131     propmakefn_t propmake;
 132     propmergefn_t propmerge;
 133     void *userstate;                   /* passed to all user functions */
 134 };
 135
 136 /* ----------------------------------------------------------------------
 137  * Memory management routines and other housekeeping.
 138  */
 139 #ifdef HAVE_ALLOCA
 140 #  define ialloc(x) alloca(x)
 141 #  define ifree(x)
 142 #else
 143 #  define ialloc(x) smalloc(x)
 144 #  define ifree(x) sfree(x)
 145 #endif
 146
 147 #define new1(t) ( (t *) smalloc(sizeof(t)) )
 148 #define newn(t, n) ( (t *) smalloc((n) * sizeof(t)) )
 149 #define inew1(t) ( (t *) ialloc(sizeof(t)) )
 150 #define inewn(t, n) ( (t *) ialloc((n) * sizeof(t)) )
 151
 152 static void *smalloc(size_t size)
 153 {
 154     void *ret = malloc(size);
 155     if (!ret)
 156         abort();
 157     return ret;
 158 }
 159
 160 static void sfree(void *p)
 161 {
 162     free(p);
 163 }
 164
 165 #ifndef FALSE
 166 #define FALSE 0
 167 #endif
 168 #ifndef TRUE
 169 #define TRUE 1
 170 #endif
 171
 172 /* We could probably do with more compiler-specific branches of this #if. */
 173 #if defined(__GNUC__)
 174 #define INLINE __inline
 175 #else
 176 #define INLINE
 177 #endif
 178
 179 /* Hooks into the low-level code for test purposes. */
 180 #ifdef TEST
 181 void testlock(int write, int set, nodeptr n);
 182 #else
 183 #define testlock(w,s,n)
 184 #endif
 185
 186 /* ----------------------------------------------------------------------
 187  * Low-level helper routines, which understand the in-memory format
 188  * of a node and know how to read-lock and write-lock.
 189  */
 190
 191 /*
 192  * Read and write the node_addr of a child.
 193  */
 194 static INLINE node_addr bt_child(btree *bt, nodeptr n, int index)
 195 {
 196     return n[index].na;
 197 }
 198 static INLINE void bt_set_child(btree *bt, nodeptr n,
 199                                 int index, node_addr value)
 200 {
 201     n[index].na = value;
 202 }
 203
 204 /*
 205  * Read and write the address of an element.
 206  */
 207 static INLINE bt_element_t bt_element(btree *bt, nodeptr n, int index)
 208 {
 209     return n[bt->maxdegree + index].ep;
 210 }
 211 static INLINE void bt_set_element(btree *bt, nodeptr n,
 212                                   int index, bt_element_t value)
 213 {
 214     n[bt->maxdegree + index].ep = value;
 215 }
 216
 217 /*
 218  * Give the number of subtrees currently present in an element.
 219  */
 220 static INLINE int bt_subtrees(btree *bt, nodeptr n)
 221 {
 222     return n[bt->maxdegree*2-1].i;
 223 }
 224 #define bt_elements(bt,n) (bt_subtrees(bt,n) - 1)
 225
 226 /*
 227  * Give the minimum and maximum number of subtrees allowed in a
 228  * node.
 229  */
 230 static INLINE int bt_min_subtrees(btree *bt)
 231 {
 232     return bt->mindegree;
 233 }
 234 static INLINE int bt_max_subtrees(btree *bt)
 235 {
 236     return bt->maxdegree;
 237 }
 238
 239 /*
 240  * Return the count of items, and the user properties, in a
 241  * particular subtree of a node.
 242  *
 243  * Note that in the in-memory form of the tree, this breaks the
 244  * read-locking semantics, by reading the counts out of the child
 245  * nodes without bothering to lock them. We're allowed to do this
 246  * because this function is implemented at the same very low level
 247  * as the implementation of bt_read_lock(), so we're allowed to
 248  * know that read locking actually doesn't do anything.
 249  */
 250 static INLINE int bt_child_count(btree *bt, nodeptr n, int index)
 251 {
 252     if (n[index].na.p)
 253         return n[index].na.p[bt->maxdegree*2].i;
 254     else
 255         return 0;
 256 }
 257
 258 static INLINE void *bt_child_prop(btree *bt, nodeptr n, int index)
 259 {
 260     if (n[index].na.p)
 261         return (char *)n[index].na.p + bt->propoffset;
 262     else
 263         return NULL;
 264 }
 265
 266 /*
 267  * Return the count of items in a whole node.
 268  */
 269 static INLINE int bt_node_count(btree *bt, nodeptr n)
 270 {
 271     return n[bt->maxdegree*2].i;
 272 }
 273
 274 /*
 275  * Determine whether a node is a leaf node or not.
 276  */
 277 static INLINE int bt_is_leaf(btree *bt, nodeptr n)
 278 {
 279     return n[0].na.p == NULL;
 280 }
 281
 282 /*
 283  * Create a new write-locked node, and return a pointer to it.
 284  */
 285 static INLINE nodeptr bt_new_node(btree *bt, int nsubtrees)
 286 {
 287     nodeptr ret = (nodecomponent *)smalloc(bt->propoffset + bt->propsize);
 288     ret[bt->maxdegree*2-1].i = nsubtrees;
 289     ret[bt->maxdegree*2+1].i = 1;      /* reference count 1 */
 290 #ifdef TEST
 291     set_invalid_property(ret + bt->maxdegree * 2 + 2);
 292 #else
 293     memset((char *)ret + bt->propoffset, 0, bt->propsize);
 294 #endif
 295     testlock(TRUE, TRUE, ret);
 296     return ret;
 297 }
 298
 299 /*
 300  * Destroy a node (must be write-locked).
 301  */
 302 static INLINE void bt_destroy_node(btree *bt, nodeptr n)
 303 {
 304     testlock(TRUE, FALSE, n);
 305     /* Free the property. */
 306     bt->propmerge(bt->userstate, NULL, NULL, n + bt->maxdegree * 2 + 2);
 307     sfree(n);
 308 }
 309
 310 /*
 311  * Take an existing node and prepare to re-use it in a new context.
 312  */
 313 static INLINE nodeptr bt_reuse_node(btree *bt, nodeptr n, int nsubtrees)
 314 {
 315     testlock(TRUE, FALSE, n);
 316     testlock(TRUE, TRUE, n);
 317     n[bt->maxdegree*2-1].i = nsubtrees;
 318     return n;
 319 }
 320
 321 /*
 322  * Return an extra reference to a node, for purposes of cloning. So
 323  * we have to update its reference count as well.
 324  */
 325 static INLINE node_addr bt_ref_node(btree *bt, node_addr n)
 326 {
 327     if (n.p)
 328         n.p[bt->maxdegree*2+1].i++;
 329     return n;
 330 }
 331
 332 /*
 333  * Drop a node's reference count, for purposes of freeing. Returns
 334  * the new reference count. Typically this will be tested against
 335  * zero to see if the node needs to be physically freed; hence a
 336  * NULL node_addr causes a return of 1 (because this isn't
 337  * necessary).
 338  */
 339 static INLINE int bt_unref_node(btree *bt, node_addr n)
 340 {
 341     if (n.p) {
 342         n.p[bt->maxdegree*2+1].i--;
 343         return n.p[bt->maxdegree*2+1].i;
 344     } else
 345         return 1;                      /* a NULL node is considered OK */
 346 }
 347
 348 /*
 349  * Clone a node during write unlocking, if its reference count is
 350  * more than one.
 351  */
 352 static nodeptr bt_clone_node(btree *bt, nodeptr n)
 353 {
 354     int i;
 355     nodeptr ret = (nodecomponent *)smalloc(bt->propoffset + bt->propsize);
 356     memcpy(ret, n, (bt->maxdegree*2+1) * sizeof(nodecomponent));
 357     if (bt->copy) {
 358         for (i = 0; i < bt_elements(bt, ret); i++) {
 359             bt_element_t *e = bt_element(bt, ret, i);
 360             bt_set_element(bt, ret, i, bt->copy(bt->userstate, e));
 361         }
 362     }
 363     ret[bt->maxdegree*2+1].i = 1;      /* clone has reference count 1 */
 364     n[bt->maxdegree*2+1].i--;          /* drop original's ref count by one */
 365     /*
 366      * At this low level, we're allowed to reach directly into the
 367      * subtrees to fiddle with their reference counts without
 368      * having to lock them.
 369      */
 370     for (i = 0; i < bt_subtrees(bt, ret); i++) {
 371         node_addr na = bt_child(bt, ret, i);
 372         if (na.p)
 373             na.p[bt->maxdegree*2+1].i++;   /* inc ref count of each child */
 374     }
 375     /*
 376      * Copy the user property explicitly (in case it contains a
 377      * pointer to an allocated area).
 378      */
 379     memset((char *)ret + bt->propoffset, 0, bt->propsize);
 380     bt->propmerge(bt->userstate, NULL, n + bt->maxdegree * 2 + 2,
 381                   ret + bt->maxdegree * 2 + 2);
 382     return ret;
 383 }
 384
 385 /*
 386  * Return the node_addr for a currently locked node. NB that this
 387  * means node movement must take place during _locking_ rather than
 388  * unlocking!
 389  */
 390 static INLINE node_addr bt_node_addr(btree *bt, nodeptr n)
 391 {
 392     node_addr ret;
 393     ret.p = n;
 394     return ret;
 395 }
 396
 397 /*
 398  * The bt_write_lock and bt_read_lock functions should gracefully
 399  * handle being asked to write-lock a null node pointer, and just
 400  * return a null nodeptr.
 401  */
 402 static INLINE nodeptr bt_write_lock_child(btree *bt, nodeptr a, int index)
 403 {
 404     node_addr addr = bt_child(bt, a, index);
 405     if (addr.p && addr.p[bt->maxdegree*2+1].i > 1) {
 406         nodeptr clone = bt_clone_node(bt, addr.p);
 407         bt_set_child(bt, a, index, bt_node_addr(bt, clone));
 408         testlock(TRUE, TRUE, clone);
 409         return clone;
 410     }
 411     testlock(TRUE, TRUE, addr.p);
 412     return addr.p;
 413 }
 414 static INLINE nodeptr bt_write_lock_root(btree *bt)
 415 {
 416     node_addr addr = bt->root;
 417     if (addr.p && addr.p[bt->maxdegree*2+1].i > 1) {
 418         nodeptr clone = bt_clone_node(bt, addr.p);
 419         bt->root = bt_node_addr(bt, clone);
 420         testlock(TRUE, TRUE, clone);
 421         return clone;
 422     }
 423     testlock(TRUE, TRUE, addr.p);
 424     return addr.p;
 425 }
 426 static INLINE nodeptr bt_read_lock(btree *bt, node_addr a)
 427 {
 428     testlock(FALSE, TRUE, a.p);
 429     return a.p;
 430 }
 431 #define bt_read_lock_root(bt) (bt_read_lock(bt, (bt)->root))
 432 #define bt_read_lock_child(bt,a,index) (bt_read_lock(bt,bt_child(bt,a,index)))
 433
 434 static INLINE void bt_write_relock(btree *bt, nodeptr n, int props)
 435 {
 436     int i, ns, count;
 437
 438     /*
 439      * Update the count in the node.
 440      */
 441     ns = bt_subtrees(bt, n);
 442     count = ns-1;                      /* count the elements */
 443     for (i = 0; i < ns; i++)
 444         count += bt_child_count(bt, n, i);
 445     n[bt->maxdegree*2].i = count;
 446     testlock(TRUE, FALSE, n);
 447     testlock(TRUE, TRUE, n);
 448
 449     /*
 450      * Update user read properties.
 451      */
 452     if (props && bt->propsize) {
 453         void *prevprop, *eltprop, *thisprop, *childprop;
 454
 455         prevprop = NULL;
 456         eltprop = ialloc(bt->propsize);
 457         thisprop = (void *)((char *)n + bt->propoffset);
 458
 459         for (i = 0; i < ns; i++) {
 460             /* Merge a subtree's property into this one.
 461              * Initially prevprop==NULL, meaning to just copy. */
 462             if ( (childprop = bt_child_prop(bt, n, i)) != NULL ) {
 463                 bt->propmerge(bt->userstate, prevprop, childprop, thisprop);
 464                 prevprop = thisprop;
 465             }
 466
 467             if (i < ns-1) {
 468                 /* Now merge in the separating element. */
 469                 bt->propmake(bt->userstate, bt_element(bt, n, i), eltprop);
 470                 bt->propmerge(bt->userstate, prevprop, eltprop, thisprop);
 471                 prevprop = thisprop;
 472             }
 473         }
 474
 475         ifree(eltprop);
 476     }
 477 }
 478
 479 static INLINE node_addr bt_write_unlock_internal(btree *bt, nodeptr n,
 480                                                  int props)
 481 {
 482     node_addr ret;
 483
 484     bt_write_relock(bt, n, props);
 485
 486     testlock(TRUE, FALSE, n);
 487
 488     ret.p = n;
 489     return ret;
 490 }
 491
 492 static INLINE node_addr bt_write_unlock(btree *bt, nodeptr n)
 493 {
 494     return bt_write_unlock_internal(bt, n, TRUE);
 495 }
 496
 497 static INLINE void bt_read_unlock(btree *bt, nodeptr n)
 498 {
 499     /*
 500      * For trees in memory, we do nothing here, except run some
 501      * optional testing.
 502      */
 503     testlock(FALSE, FALSE, n);
 504 }
 505
 506 /* ----------------------------------------------------------------------
 507  * Higher-level helper functions, which should be independent of
 508  * the knowledge of precise node structure in the above code.
 509  */
 510
 511 /*
 512  * Return the count of items below a node that appear before the
 513  * start of a given subtree.
 514  */
 515 static int bt_child_startpos(btree *bt, nodeptr n, int index)
 516 {
 517     int pos = 0;
 518
 519     while (index > 0) {
 520         index--;
 521         pos += bt_child_count(bt, n, index) + 1;   /* 1 for separating elt */
 522     }
 523     return pos;
 524 }
 525
 526 /*
 527  * Create a new root node for a tree.
 528  */
 529 static void bt_new_root(btree *bt, node_addr left, node_addr right,
 530                         bt_element_t element)
 531 {
 532     nodeptr n;
 533     n = bt_new_node(bt, 2);
 534     bt_set_child(bt, n, 0, left);
 535     bt_set_child(bt, n, 1, right);
 536     bt_set_element(bt, n, 0, element);
 537     bt->root = bt_write_unlock(bt, n);
 538     bt->depth++;
 539 }
 540
 541 /*
 542  * Discard the root node of a tree, and enshrine a new node as the
 543  * root. Expects to be passed a write-locked nodeptr to the old
 544  * root.
 545  */
 546 static void bt_shift_root(btree *bt, nodeptr n, node_addr na)
 547 {
 548     bt_destroy_node(bt, n);
 549     bt->root = na;
 550     bt->depth--;
 551 }
 552
 553 /*
 554  * Given a numeric index within a node, find which subtree we would
 555  * descend to in order to find that index.
 556  *
 557  * Updates `pos' to give the numeric index within the subtree
 558  * found. Also returns `ends' (if non-NULL), which has bit 0 set if
 559  * the index is at the very left edge of the subtree, and/or bit 1
 560  * if it's at the very right edge.
 561  *
 562  * Return value is the number of the subtree (0 upwards).
 563  */
 564 #define ENDS_NONE  0
 565 #define ENDS_LEFT  1
 566 #define ENDS_RIGHT 2
 567 #define ENDS_BOTH  3
 568 static int bt_lookup_pos(btree *bt, nodeptr n, int *pos, int *ends)
 569 {
 570     int child = 0;
 571     int nchildren = bt_subtrees(bt, n);
 572
 573     while (child < nchildren) {
 574         int count = bt_child_count(bt, n, child);
 575         if (*pos <= count) {
 576             if (ends) {
 577                 *ends = 0;
 578                 if (*pos == count)
 579                     *ends |= ENDS_RIGHT;
 580                 if (*pos == 0)
 581                     *ends |= ENDS_LEFT;
 582             }
 583             return child;
 584         }
 585         *pos -= count + 1;             /* 1 for the separating element */
 586         child++;
 587     }
 588     return -1;                         /* ran off the end; shouldn't happen */
 589 }
 590
 591 /*
 592  * Given an element to search for within a node, find either the
 593  * element, or which subtree we would descend to to continue
 594  * searching for that element.
 595  *
 596  * Return value is either the index of the element, or the index of
 597  * the subtree (both 0 upwards). `is_elt' returns FALSE or TRUE
 598  * respectively.
 599  *
 600  * Since this may be used by bt_find() with an alternative cmpfn_t,
 601  * we always pass the input element as the first argument to cmp.
 602  */
 603 static int bt_lookup_cmp(btree *bt, nodeptr n, bt_element_t element,
 604                          cmpfn_t cmp, int *is_elt)
 605 {
 606     int mintree = 0, maxtree = bt_subtrees(bt, n)-1;
 607
 608     while (mintree < maxtree) {
 609         int elt = (maxtree + mintree) / 2;
 610         int c = cmp(bt->userstate, element, bt_element(bt, n, elt));
 611
 612         if (c == 0) {
 613             *is_elt = TRUE;
 614             return elt;
 615         } else if (c < 0) {
 616             /*
 617              * `element' is less than element `elt'. So it can be
 618              * in subtree number `elt' at the highest.
 619              */
 620             maxtree = elt;
 621         } else { /* c > 0 */
 622             /*
 623              * `element' is greater than element `elt'. So it can
 624              * be in subtree number (elt+1) at the lowest.
 625              */
 626             mintree = elt+1;
 627         }
 628     }
 629
 630     /*
 631      * If we reach here without returning, we must have narrowed
 632      * our search to the point where mintree = maxtree. So the
 633      * element is not in the node itself and we know which subtree
 634      * to search next.
 635      */
 636     assert(mintree == maxtree);
 637     *is_elt = FALSE;
 638     return mintree;
 639 }
 640
 641 /*
 642  * Generic transformations on B-tree nodes.
 643  *
 644  * This function divides essentially into an input side and an
 645  * output side. The input side accumulates a list of items
 646  * node,element,node,element,...,element,node; the output side
 647  * writes those items into either one or two nodes.
 648  *
 649  * `intype' can be:
 650  *
 651  *  - NODE_AS_IS. The input list is the contents of in1, followed
 652  *    by inelt, followed by the contents of in2. The `extra'
 653  *    parameters are unused, as is `inaux'.
 654  *
 655  *  - NODE_ADD_ELT. `in2' is unused. The input list is the contents
 656  *    of `in1', but with subtree pointer number `inaux' replaced by
 657  *    extra1/inelt/extra2.
 658  *
 659  *  - NODE_DEL_ELT. `in2' and `inelt' are unused, as is `extra2'.
 660  *    The input list is the contents of `in1', but with element
 661  *    pointer number `inaux' and its surrounding two subtrees
 662  *    replaced by extra1.
 663  *
 664  * Having obtained the input list, it is then written to one or two
 665  * output nodes. If `splitpos' is NODE_JOIN, everything is written
 666  * into one output node `out1'. Otherwise, `splitpos' is treated as
 667  * an element index within the input list; that element is returned
 668  * in `outelt', and the contents of the list is divided there and
 669  * returned in nodes `out1' and `out2'.
 670  *
 671  * This function will re-use nodes in the `obvious' order. If two
 672  * nodes are passed in and two nodes are output, they'll be the
 673  * same nodes; if one node is passed in and one node output, it
 674  * will be the same node too. If two are passed in and only one
 675  * output, the first one will be used and the second destroyed; if
 676  * one node is passed in and two are output, the one passed in will
 677  * be the first of those returned, and the second will be new.
 678  */
 679 #define NODE_AS_IS 1
 680 #define NODE_ADD_ELT 2
 681 #define NODE_DEL_ELT 3
 682 #define NODE_JOIN -1
 683 static void bt_xform(btree *bt, int intype, int inaux,
 684                      nodeptr in1, nodeptr in2, bt_element_t inelt,
 685                      node_addr extra1, node_addr extra2,
 686                      int splitpos, nodeptr *out1, nodeptr *out2,
 687                      bt_element_t *outelt)
 688 {
 689     node_addr *nodes;
 690     bt_element_t *elements;
 691     nodeptr ret1, ret2;
 692     int n1, n2, off2, i, j;
 693
 694     nodes = inewn(node_addr, 2 * bt_max_subtrees(bt));
 695     elements = inewn(bt_element_t, 2 * bt_max_subtrees(bt));
 696
 697     /*
 698      * Accumulate the input list.
 699      */
 700     switch(intype) {
 701       case NODE_AS_IS:
 702         n1 = bt_subtrees(bt, in1);
 703         n2 = bt_subtrees(bt, in2);
 704         off2 = 0;
 705         break;
 706       case NODE_ADD_ELT:
 707         in2 = in1;
 708         n1 = inaux+1;
 709         n2 = bt_subtrees(bt, in1) - inaux;
 710         off2 = inaux;
 711         break;
 712       case NODE_DEL_ELT:
 713         in2 = in1;
 714         n1 = inaux+1;
 715         n2 = bt_subtrees(bt, in1) - inaux - 1;
 716         off2 = inaux+1;
 717         break;
 718     }
 719     i = j = 0;
 720     while (j < n1) {
 721         nodes[i] = bt_child(bt, in1, j);
 722         if (j+1 < n1)
 723             elements[i] = bt_element(bt, in1, j);
 724         i++, j++;
 725     }
 726     if (intype == NODE_DEL_ELT) {
 727         i--;
 728     }
 729     j = 0;
 730     while (j < n2) {
 731         nodes[i] = bt_child(bt, in2, off2+j);
 732         if (j+1 < n2)
 733             elements[i] = bt_element(bt, in2, off2+j);
 734         i++, j++;
 735     }
 736     switch (intype) {
 737       case NODE_AS_IS:
 738         elements[n1-1] = inelt;
 739         break;
 740       case NODE_ADD_ELT:
 741         nodes[n1-1] = extra1;
 742         nodes[n1] = extra2;
 743         elements[n1-1] = inelt;
 744         break;
 745       case NODE_DEL_ELT:
 746         nodes[n1-1] = extra1;
 747         break;
 748     }
 749
 750     /*
 751      * Now determine how many subtrees go in each output node, and
 752      * actually create the nodes to be returned.
 753      */
 754     if (splitpos != NODE_JOIN) {
 755         n1 = splitpos+1, n2 = i - splitpos - 1;
 756         if (outelt)
 757             *outelt = elements[splitpos];
 758     } else {
 759         n1 = i, n2 = 0;
 760     }
 761
 762     ret1 = bt_reuse_node(bt, in1, n1);
 763     if (intype == NODE_AS_IS && in2) {
 764         /* We have a second input node. */
 765         if (n2)
 766             ret2 = bt_reuse_node(bt, in2, n2);
 767         else
 768             bt_destroy_node(bt, in2);
 769     } else {
 770         /* We have no second input node. */
 771         if (n2)
 772             ret2 = bt_new_node(bt, n2);
 773         else
 774             ret2 = NULL;
 775     }
 776
 777     if (out1) *out1 = ret1;
 778     if (out2) *out2 = ret2;
 779
 780     for (i = 0; i < n1; i++) {
 781         bt_set_child(bt, ret1, i, nodes[i]);
 782         if (i+1 < n1)
 783             bt_set_element(bt, ret1, i, elements[i]);
 784     }
 785     if (n2) {
 786         if (outelt) *outelt = elements[n1-1];
 787         for (i = 0; i < n2; i++) {
 788             bt_set_child(bt, ret2, i, nodes[n1+i]);
 789             if (i+1 < n2)
 790                 bt_set_element(bt, ret2, i, elements[n1+i]);
 791         }
 792     }
 793
 794     ifree(nodes);
 795     ifree(elements);
 796 }
 797
 798 /*
 799  * Fiddly little compare functions for use in special cases of
 800  * findrelpos. One always returns +1 (a > b), the other always
 801  * returns -1 (a < b).
 802  */
 803 static int bt_cmp_greater(void *state,
 804                           const bt_element_t a, const bt_element_t b)
 805 {
 806     return +1;
 807 }
 808 static int bt_cmp_less(void *state,
 809                        const bt_element_t a, const bt_element_t b)
 810 {
 811     return -1;
 812 }
 813
 814 /* ----------------------------------------------------------------------
 815  * User-visible administration routines.
 816  */
 817
 818 btree *bt_new(cmpfn_t cmp, copyfn_t copy, freefn_t freeelt,
 819               int propsize, int propalign, propmakefn_t propmake,
 820               propmergefn_t propmerge, void *state, int mindegree)
 821 {
 822     btree *ret;
 823
 824     ret = new1(btree);
 825     ret->mindegree = mindegree;
 826     ret->maxdegree = 2*mindegree;
 827     ret->depth = 0;                    /* not even a root right now */
 828     ret->root = NODE_ADDR_NULL;
 829     ret->cmp = cmp;
 830     ret->copy = copy;
 831     ret->freeelt = freeelt;
 832     ret->propsize = propsize;
 833     ret->propalign = propalign;
 834     ret->propoffset = sizeof(nodecomponent) * (ret->maxdegree*2 + 2);
 835     if (propalign > 0) {
 836         ret->propoffset += propalign - 1;
 837         ret->propoffset -= ret->propoffset % propalign;
 838     }
 839     ret->propmake = propmake;
 840     ret->propmerge = propmerge;
 841     ret->userstate = state;
 842
 843     return ret;
 844 }
 845
 846 static void bt_free_node(btree *bt, nodeptr n)
 847 {
 848     int i;
 849
 850     for (i = 0; i < bt_subtrees(bt, n); i++) {
 851         node_addr na;
 852         nodeptr n2;
 853
 854         na = bt_child(bt, n, i);
 855         if (!bt_unref_node(bt, na)) {
 856             n2 = bt_write_lock_child(bt, n, i);
 857             bt_free_node(bt, n2);
 858         }
 859     }
 860
 861     if (bt->freeelt) {
 862         for (i = 0; i < bt_subtrees(bt, n)-1; i++)
 863             bt->freeelt(bt->userstate, bt_element(bt, n, i));
 864     }
 865
 866     bt_destroy_node(bt, n);
 867 }
 868
 869 void bt_free(btree *bt)
 870 {
 871     nodeptr n;
 872
 873     if (!bt_unref_node(bt, bt->root)) {
 874         n = bt_write_lock_root(bt);
 875         bt_free_node(bt, n);
 876     }
 877
 878     sfree(bt);
 879 }
 880
 881 btree *bt_clone(btree *bt)
 882 {
 883     btree *bt2;
 884
 885     bt2 = bt_new(bt->cmp, bt->copy, bt->freeelt, bt->propsize, bt->propalign,
 886                  bt->propmake, bt->propmerge, bt->userstate, bt->mindegree);
 887     bt2->depth = bt->depth;
 888     bt2->root = bt_ref_node(bt, bt->root);
 889     return bt2;
 890 }
 891
 892 /*
 893  * Nice simple function to count the size of a tree.
 894  */
 895 int bt_count(btree *bt)
 896 {
 897     int count;
 898     nodeptr n;
 899
 900     n = bt_read_lock_root(bt);
 901     if (n) {
 902         count = bt_node_count(bt, n);
 903         bt_read_unlock(bt, n);
 904         return count;
 905     } else {
 906         return 0;
 907     }
 908 }
 909
 910 /* ----------------------------------------------------------------------
 911  * Actual B-tree algorithms.
 912  */
 913
 914 /*
 915  * Find an element by numeric index. bt_index_w is the same, but
 916  * works with write locks instead of read locks, so it guarantees
 917  * to return an element with only one reference to it. (You'd use
 918  * this if you were using tree cloning, and wanted to modify the
 919  * element once you'd found it.)
 920  */
 921 bt_element_t bt_index(btree *bt, int index)
 922 {
 923     nodeptr n, n2;
 924     int child, ends;
 925
 926     n = bt_read_lock_root(bt);
 927
 928     if (index < 0 || index >= bt_node_count(bt, n)) {
 929         bt_read_unlock(bt, n);
 930         return NULL;
 931     }
 932
 933     while (1) {
 934         child = bt_lookup_pos(bt, n, &index, &ends);
 935         if (ends & ENDS_RIGHT) {
 936             bt_element_t ret = bt_element(bt, n, child);
 937             bt_read_unlock(bt, n);
 938             return ret;
 939         }
 940         n2 = bt_read_lock_child(bt, n, child);
 941         bt_read_unlock(bt, n);
 942         n = n2;
 943         assert(n != NULL);
 944     }
 945 }
 946
 947 bt_element_t bt_index_w(btree *bt, int index)
 948 {
 949     nodeptr n, n2;
 950     int nnodes, child, ends;
 951     nodeptr *nodes;
 952     bt_element_t ret;
 953
 954     nodes = inewn(nodeptr, bt->depth+1);
 955     nnodes = 0;
 956
 957     n = bt_write_lock_root(bt);
 958
 959     if (index < 0 || index >= bt_node_count(bt, n)) {
 960         bt_write_unlock(bt, n);
 961         return NULL;
 962     }
 963
 964     while (1) {
 965         nodes[nnodes++] = n;
 966         child = bt_lookup_pos(bt, n, &index, &ends);
 967         if (ends & ENDS_RIGHT) {
 968             ret = bt_element(bt, n, child);
 969             break;
 970         }
 971         n2 = bt_write_lock_child(bt, n, child);
 972         n = n2;
 973         assert(n != NULL);
 974     }
 975
 976     while (nnodes-- > 0)
 977         bt_write_unlock(bt, nodes[nnodes]);
 978
 979     return ret;
 980 }
 981
 982 /*
 983  * Search for an element by sorted order.
 984  */
 985 bt_element_t bt_findrelpos(btree *bt, bt_element_t element, cmpfn_t cmp,
 986                            int relation, int *index)
 987 {
 988     nodeptr n, n2;
 989     int child, is_elt;
 990     bt_element_t gotit;
 991     int pos = 0;
 992     int count;
 993
 994     if (!cmp) cmp = bt->cmp;
 995
 996     /*
 997      * Special case: relation LT/GT and element NULL means get an
 998      * extreme element of the tree. We do this by fudging the
 999      * compare function so that our NULL element will be considered
1000      * infinitely large or infinitely small.
1001      */
1002     if (element == NULL) {
1003         assert(relation == BT_REL_LT || relation == BT_REL_GT);
1004         if (relation == BT_REL_LT)
1005             cmp = bt_cmp_greater;      /* always returns a > b */
1006         else
1007             cmp = bt_cmp_less;         /* always returns a < b */
1008     }
1009
1010     gotit = NULL;
1011     n = bt_read_lock_root(bt);
1012     if (!n)
1013         return NULL;
1014     count = bt_node_count(bt, n);
1015     while (n) {
1016         child = bt_lookup_cmp(bt, n, element, cmp, &is_elt);
1017         if (is_elt) {
1018             pos += bt_child_startpos(bt, n, child+1) - 1;
1019             gotit = bt_element(bt, n, child);
1020             bt_read_unlock(bt, n);
1021             break;
1022         } else {
1023             pos += bt_child_startpos(bt, n, child);
1024             n2 = bt_read_lock_child(bt, n, child);
1025             bt_read_unlock(bt, n);
1026             n = n2;
1027         }
1028     }
1029
1030     /*
1031      * Now all nodes are unlocked, and we are _either_ (a) holding
1032      * an element in `gotit' whose index we have in `pos', _or_ (b)
1033      * holding nothing in `gotit' but we know the index of the
1034      * next-higher element.
1035      */
1036     if (gotit) {
1037         /*
1038          * We have the real element. For EQ, LE and GE relations we
1039          * can now just return it; otherwise we must return the
1040          * next element down or up.
1041          */
1042         if (relation == BT_REL_LT)
1043             gotit = bt_index(bt, --pos);
1044         else if (relation == BT_REL_GT)
1045             gotit = bt_index(bt, ++pos);
1046     } else {
1047         /*
1048          * We don't have the real element. For EQ relation we now
1049          * just give up; for everything else we return the next
1050          * element down or up.
1051          */
1052         if (relation == BT_REL_LT || relation == BT_REL_LE)
1053             gotit = bt_index(bt, --pos);
1054         else if (relation == BT_REL_GT || relation == BT_REL_GE)
1055             gotit = bt_index(bt, pos);
1056     }
1057
1058     if (gotit && index) *index = pos;
1059     return gotit;
1060 }
1061 bt_element_t bt_findrel(btree *bt, bt_element_t element, cmpfn_t cmp,
1062                         int relation)
1063 {
1064     return bt_findrelpos(bt, element, cmp, relation, NULL);
1065 }
1066 bt_element_t bt_findpos(btree *bt, bt_element_t element, cmpfn_t cmp,
1067                         int *index)
1068 {
1069     return bt_findrelpos(bt, element, cmp, BT_REL_EQ, index);
1070 }
1071 bt_element_t bt_find(btree *bt, bt_element_t element, cmpfn_t cmp)
1072 {
1073     return bt_findrelpos(bt, element, cmp, BT_REL_EQ, NULL);
1074 }
1075
1076 /*
1077  * Find an element by property-based search. Returns the element
1078  * (if one is selected - the search can also terminate by
1079  * descending to a nonexistent subtree of a leaf node, equivalent
1080  * to selecting the _gap_ between two elements); also returns the
1081  * index of either the element or the gap in `*index' if `index' is
1082  * non-NULL.
1083  */
1084 bt_element_t bt_propfind(btree *bt, searchfn_t search, void *sstate,
1085                          int *index)
1086 {
1087     nodeptr n, n2;
1088     int i, j, count, is_elt;
1089     void **props;
1090     int *counts;
1091     bt_element_t *elts;
1092     bt_element_t *e = NULL;
1093
1094     props = inewn(void *, bt->maxdegree);
1095     counts = inewn(int, bt->maxdegree);
1096     elts = inewn(bt_element_t, bt->maxdegree);
1097
1098     n = bt_read_lock_root(bt);
1099
1100     count = 0;
1101
1102     while (n) {
1103         int ntrees = bt_subtrees(bt, n);
1104
1105         /*
1106          * Prepare the arguments to the search function.
1107          */
1108         for (i = 0; i < ntrees; i++) {
1109             props[i] = bt_child_prop(bt, n, i);
1110             counts[i] = bt_child_count(bt, n, i);
1111             if (i < ntrees-1)
1112                 elts[i] = bt_element(bt, n, i);
1113         }
1114
1115         /*
1116          * Call the search function.
1117          */
1118         i = search(bt->userstate, sstate, ntrees,
1119                    props, counts, elts, &is_elt);
1120
1121         if (!is_elt) {
1122             /*
1123              * Descend to subtree i. Update `count' to consider
1124              * everything (both subtrees and elements) before that
1125              * subtree.
1126              */
1127             for (j = 0; j < i; j++)
1128                 count += 1 + bt_child_count(bt, n, j);
1129             n2 = bt_read_lock_child(bt, n, i);
1130             bt_read_unlock(bt, n);
1131             n = n2;
1132         } else {
1133             /*
1134              * Return element i. Update `count' to consider
1135              * everything (both subtrees and elements) before that
1136              * element.
1137              */
1138             for (j = 0; j <= i; j++)
1139                 count += 1 + bt_child_count(bt, n, j);
1140             count--;                   /* don't count element i itself */
1141             e = bt_element(bt, n, i);
1142             bt_read_unlock(bt, n);
1143             break;
1144         }
1145     }
1146
1147     ifree(props);
1148     ifree(counts);
1149     ifree(elts);
1150
1151     if (index) *index = count;
1152     return e;
1153 }
1154
1155 /*
1156  * Replace the element at a numeric index by a new element. Returns
1157  * the old element.
1158  *
1159  * Can also be used when the new element is the _same_ as the old
1160  * element, but has changed in some way that will affect user
1161  * properties.
1162  */
1163 bt_element_t bt_replace(btree *bt, bt_element_t element, int index)
1164 {
1165     nodeptr n;
1166     nodeptr *nodes;
1167     bt_element_t ret;
1168     int nnodes, child, ends;
1169
1170     nodes = inewn(nodeptr, bt->depth+1);
1171     nnodes = 0;
1172
1173     n = bt_write_lock_root(bt);
1174
1175     if (index < 0 || index >= bt_node_count(bt, n)) {
1176         bt_write_unlock(bt, n);
1177         return NULL;
1178     }
1179
1180     while (1) {
1181         nodes[nnodes++] = n;
1182         child = bt_lookup_pos(bt, n, &index, &ends);
1183         if (ends & ENDS_RIGHT) {
1184             ret = bt_element(bt, n, child);
1185             bt_set_element(bt, n, child, element);
1186             break;
1187         }
1188         n = bt_write_lock_child(bt, n, child);
1189         assert(n != NULL);
1190     }
1191
1192     while (nnodes-- > 0)
1193         bt_write_unlock(bt, nodes[nnodes]);
1194
1195     return ret;
1196 }
1197
1198 /*
1199  * Add at a specific position. As we search down the tree we must
1200  * write-lock every node we meet, since otherwise we might fail to
1201  * clone nodes that will end up pointing to different things.
1202  */
1203 void bt_addpos(btree *bt, bt_element_t element, int pos)
1204 {
1205     nodeptr n;
1206     node_addr left, right, single;
1207     nodeptr *nodes;
1208     int *childposns;
1209     int nnodes, child;
1210
1211     /*
1212      * Since in a reference-counted tree we can't have parent
1213      * links, we will have to use O(depth) space to store the list
1214      * of nodeptrs we have gone through, so we can un-write-lock
1215      * them when we've finished. We also store the subtree index we
1216      * descended to at each stage.
1217      */
1218     nodes = inewn(nodeptr, bt->depth+1);
1219     childposns = inewn(int, bt->depth+1);
1220     nnodes = 0;
1221
1222     n = bt_write_lock_root(bt);
1223
1224     assert(pos >= 0 && pos <= (n ? bt_node_count(bt, n) : 0));
1225
1226     /*
1227      * Scan down the tree, write-locking nodes, until we find the
1228      * empty subtree where we want to insert the item.
1229      */
1230     while (n) {
1231         nodes[nnodes] = n;
1232         child = bt_lookup_pos(bt, n, &pos, NULL);
1233         childposns[nnodes] = child;
1234         nnodes++;
1235         n = bt_write_lock_child(bt, n, child);
1236     }
1237
1238     left = right = NODE_ADDR_NULL;
1239
1240     /*
1241      * Now nodes[nnodes-1] wants to have subtree index
1242      * childposns[nnodes-1] replaced by the node/element/node triple
1243      * (left,element,right). Propagate this up the tree until we
1244      * can stop.
1245      */
1246     while (nnodes-- > 0) {
1247         n = nodes[nnodes];
1248         if (bt_subtrees(bt, n) == bt_max_subtrees(bt)) {
1249             nodeptr lptr, rptr;
1250             /* Split the node and carry on up. */
1251             bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1252                      n, NULL, element, left, right,
1253                      bt_min_subtrees(bt), &lptr, &rptr, &element);
1254             left = bt_write_unlock(bt, lptr);
1255             right = bt_write_unlock(bt, rptr);
1256         } else {
1257             bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1258                      n, NULL, element, left, right,
1259                      NODE_JOIN, &n, NULL, NULL);
1260             single = bt_write_unlock(bt, n);
1261             break;
1262         }
1263     }
1264
1265     /*
1266      * If nnodes < 0, we have just split the root and we need to
1267      * build a new root node.
1268      */
1269     if (nnodes < 0) {
1270         bt_new_root(bt, left, right, element);
1271     } else {
1272         /*
1273          * Now nodes[nnodes-1] just wants to have child pointer
1274          * child[nnodes-1] replaced by `single', in case the
1275          * subtree was moved. Propagate this back up to the root,
1276          * unlocking all nodes.
1277          */
1278         while (nnodes-- > 0) {
1279             bt_set_child(bt, nodes[nnodes], childposns[nnodes], single);
1280             single = bt_write_unlock(bt, nodes[nnodes]);
1281         }
1282     }
1283
1284     ifree(nodes);
1285     ifree(childposns);
1286 }
1287
1288 /*
1289  * Add an element in sorted order. This is a wrapper on bt_addpos()
1290  * which finds the numeric index to add the item at and then calls
1291  * addpos. This isn't an optimal use of time, but it saves space by
1292  * avoiding starting to clone multiply-linked nodes until it's
1293  * known that the item _can_ be added to the tree (and isn't
1294  * duplicated in it already).
1295  */
1296 bt_element_t bt_add(btree *bt, bt_element_t element)
1297 {
1298     nodeptr n, n2;
1299     int child, is_elt;
1300     int pos = 0;
1301
1302     n = bt_read_lock_root(bt);
1303     while (n) {
1304         child = bt_lookup_cmp(bt, n, element, bt->cmp, &is_elt);
1305         if (is_elt) {
1306             bt_read_unlock(bt, n);
1307             return bt_element(bt, n, child);   /* element exists already */
1308         } else {
1309             pos += bt_child_startpos(bt, n, child);
1310             n2 = bt_read_lock_child(bt, n, child);
1311             bt_read_unlock(bt, n);
1312             n = n2;
1313         }
1314     }
1315     bt_addpos(bt, element, pos);
1316     return element;
1317 }
1318
1319 /*
1320  * Delete an element given its numeric position. Returns the
1321  * element deleted.
1322  */
1323 bt_element_t bt_delpos(btree *bt, int pos)
1324 {
1325     nodeptr n, c, c2, saved_n;
1326     nodeptr *nodes;
1327     int nnodes, child, nroot, pos2, ends, st, splitpoint, saved_pos;
1328     bt_element_t e, ret;
1329
1330     /*
1331      * Just like in bt_add, we store the set of nodeptrs we
1332      * write-locked on the way down, so we can unlock them on the
1333      * way back up.
1334      */
1335     nodes = inewn(nodeptr, bt->depth+1);
1336     nnodes = 0;
1337
1338     n = bt_write_lock_root(bt);
1339     nroot = TRUE;
1340     saved_n = NULL;
1341
1342     if (!n || pos < 0 || pos >= bt_node_count(bt, n)) {
1343         if (n)
1344             bt_write_unlock(bt, n);
1345         return NULL;
1346     }
1347
1348     while (1) {
1349         nodes[nnodes++] = n;
1350
1351         /*
1352          * Find out which subtree to descend to.
1353          */
1354         pos2 = pos;
1355         child = bt_lookup_pos(bt, n, &pos, &ends);
1356         c = bt_write_lock_child(bt, n, child);
1357         if (c && bt_subtrees(bt, c) == bt_min_subtrees(bt)) {
1358             /*
1359              * We're trying to descend to a subtree that's of
1360              * minimum size. Do something!
1361              */
1362             if (child > 0) {
1363                 /*
1364                  * Either move a subtree from the left sibling, or
1365                  * merge with it. (Traditionally we would only
1366                  * merge if we can't move a subtree from _either_
1367                  * sibling, but this way avoids too many extra
1368                  * write locks.)
1369                  */
1370                 c2 = c;
1371                 c = bt_write_lock_child(bt, n, child-1);
1372                 e = bt_element(bt, n, child-1);
1373                 st = bt_subtrees(bt, c);
1374                 if (st > bt_min_subtrees(bt))
1375                     splitpoint = st - 2;
1376                 else
1377                     splitpoint = NODE_JOIN;
1378                 child--;
1379             } else {
1380                 /*
1381                  * Likewise on the right-hand side.
1382                  */
1383                 c2 = bt_write_lock_child(bt, n, child+1);
1384                 e = bt_element(bt, n, child);
1385                 st = bt_subtrees(bt, c2);
1386                 if (st > bt_min_subtrees(bt))
1387                     splitpoint = bt_min_subtrees(bt);
1388                 else
1389                     splitpoint = NODE_JOIN;
1390             }
1391
1392             if (splitpoint == NODE_JOIN) {
1393                 /*
1394                  * So if we're merging nodes, go to it...
1395                  */
1396                 bt_xform(bt, NODE_AS_IS, 0,
1397                          c, c2, e, NODE_ADDR_NULL, NODE_ADDR_NULL,
1398                          NODE_JOIN, &c, NULL, NULL);
1399                 bt_xform(bt, NODE_DEL_ELT, child,
1400                          n, NULL, NULL, bt_node_addr(bt, c), NODE_ADDR_NULL,
1401                          NODE_JOIN, &n, NULL, NULL);
1402                 if (nroot && bt_subtrees(bt, n) == 1) {
1403                     /*
1404                      * Whoops, we just merged the last two children
1405                      * of the root. Better relocate the root.
1406                      */
1407                     bt_shift_root(bt, n, bt_node_addr(bt, c));
1408                     nnodes--;          /* don't leave it in nodes[]! */
1409                     n = NULL;
1410                     bt_write_relock(bt, c, TRUE);
1411                 } else
1412                     bt_write_unlock(bt, c);
1413             } else {
1414                 /*
1415                  * Or if we're redistributing subtrees, go to that.
1416                  */
1417                 bt_xform(bt, NODE_AS_IS, 0,
1418                          c, c2, e, NODE_ADDR_NULL, NODE_ADDR_NULL,
1419                          splitpoint, &c, &c2, &e);
1420                 bt_set_element(bt, n, child, e);
1421                 bt_write_unlock(bt, c);
1422                 bt_write_unlock(bt, c2);
1423             }
1424
1425             if (n) {
1426                 /* Recompute the counts in n so we can do lookups again. */
1427                 bt_write_relock(bt, n, TRUE);
1428
1429                 /* Having done the transform, redo the position lookup. */
1430                 pos = pos2;
1431                 child = bt_lookup_pos(bt, n, &pos, &ends);
1432                 c = bt_write_lock_child(bt, n, child);
1433             } else {
1434                 pos = pos2;
1435             }
1436         }
1437
1438         /*
1439          * Now see if this node contains the element we're
1440          * looking for.
1441          */
1442         if (n && (ends & ENDS_RIGHT)) {
1443             /*
1444              * It does. Element number `child' is the element we
1445              * want to delete. See if this is a leaf node...
1446              */
1447             if (!bt_is_leaf(bt, n)) {
1448                 /*
1449                  * It's not a leaf node. So we save the nodeptr and
1450                  * element index for later reference, and decrement
1451                  * `pos' so that we're searching for the element to its
1452                  * left, which _will_ be in a leaf node.
1453                  */
1454                 saved_n = n;
1455                 saved_pos = child;
1456                 pos--;
1457             } else {
1458                 /*
1459                  * We've reached a leaf node. Check to see if an
1460                  * internal-node position was stored in saved_n and
1461                  * saved_pos, and move this element there if so.
1462                  */
1463                 if (saved_n) {
1464                     ret = bt_element(bt, saved_n, saved_pos);
1465                     bt_set_element(bt, saved_n, saved_pos,
1466                                    bt_element(bt, n, child));
1467                 } else {
1468                     ret = bt_element(bt, n, child);
1469                 }
1470                 /* Then delete it from the leaf node. */
1471                 bt_xform(bt, NODE_DEL_ELT, child,
1472                          n, NULL, NULL, NODE_ADDR_NULL, NODE_ADDR_NULL,
1473                          NODE_JOIN, &n, NULL, NULL);
1474                 /*
1475                  * Final special case: if this is the root node and
1476                  * we've just deleted its last element, we should
1477                  * destroy it and leave a completely empty tree.
1478                  */
1479                 if (nroot && bt_subtrees(bt, n) == 1) {
1480                     bt_shift_root(bt, n, NODE_ADDR_NULL);
1481                     nnodes--;          /* and take it out of nodes[] */
1482                 }
1483                 /* Now we're done */
1484                 break;
1485             }
1486         }
1487
1488         /* Descend to the child and go round again. */
1489         n = c;
1490         nroot = FALSE;
1491     }
1492
1493     /*
1494      * All done. Zip back up the tree un-write-locking nodes.
1495      */
1496     while (nnodes-- > 0)
1497         bt_write_unlock(bt, nodes[nnodes]);
1498
1499     ifree(nodes);
1500
1501     return ret;
1502 }
1503
1504 /*
1505  * Delete an element in sorted order.
1506  */
1507 bt_element_t bt_del(btree *bt, bt_element_t element)
1508 {
1509     int index;
1510     if (!bt_findrelpos(bt, element, NULL, BT_REL_EQ, &index))
1511         return NULL;                   /* wasn't there */
1512     return bt_delpos(bt, index);
1513 }
1514
1515 /*
1516  * Join two trees together, given their respective depths and a
1517  * middle element. Puts the resulting tree in the root of `bt'.
1518  *
1519  * This internal routine assumes that the trees have the same
1520  * degree.
1521  *
1522  * The input nodeptrs are assumed to be write-locked, but none of
1523  * their children are yet write-locked.
1524  */
1525 static void bt_join_internal(btree *bt, nodeptr lp, nodeptr rp,
1526                             bt_element_t sep, int ld, int rd)
1527 {
1528     nodeptr *nodes;
1529     int *childposns;
1530     int nnodes, nodessize;
1531     int lsub, rsub;
1532
1533     /*
1534      * We will need to store parent nodes up to the difference
1535      * between ld and rd.
1536      */
1537     nodessize = (ld < rd ? rd-ld : ld-rd);
1538     if (nodessize) {                   /* we may not need _any_! */
1539         nodes = inewn(nodeptr, nodessize);
1540         childposns = inewn(int, nodessize);
1541     }
1542     nnodes = 0;
1543
1544     if (ld > rd) {
1545         bt->root = bt_node_addr(bt, lp);
1546         bt->depth = ld;
1547         /* If the left tree is taller, search down its right-hand edge. */
1548         while (ld > rd) {
1549             int child = bt_subtrees(bt, lp) - 1;
1550             nodeptr n = bt_write_lock_child(bt, lp, child);
1551             nodes[nnodes] = lp;
1552             childposns[nnodes] = child;
1553             nnodes++;
1554             lp = n;
1555             ld--;
1556         }
1557     } else {
1558         bt->root = bt_node_addr(bt, rp);
1559         bt->depth = rd;
1560         /* If the right tree is taller, search down its left-hand edge. */
1561         while (rd > ld) {
1562             nodeptr n = bt_write_lock_child(bt, rp, 0);
1563             nodes[nnodes] = rp;
1564             childposns[nnodes] = 0;
1565             nnodes++;
1566             rp = n;
1567             rd--;
1568         }
1569     }
1570
1571     /*
1572      * So we now want to combine nodes lp and rp into either one or
1573      * two plausibly-sized nodes, whichever is feasible. We have a
1574      * joining element `sep'.
1575      */
1576     lsub = (lp ? bt_subtrees(bt, lp) : 0);
1577     rsub = (rp ? bt_subtrees(bt, rp) : 0);
1578     if (lp && rp && lsub + rsub <= bt_max_subtrees(bt)) {
1579         node_addr la;
1580         /* Join the nodes into one. */
1581         bt_xform(bt, NODE_AS_IS, 0, lp, rp, sep,
1582                  NODE_ADDR_NULL, NODE_ADDR_NULL,
1583                  NODE_JOIN, &lp, NULL, NULL);
1584         /* Unlock the node. */
1585         la = bt_write_unlock(bt, lp);
1586         /* Update the child pointer in the next node up. */
1587         if (nnodes > 0)
1588             bt_set_child(bt, nodes[nnodes-1], childposns[nnodes-1], la);
1589         else
1590             bt->root = la;
1591     } else {
1592         node_addr la, ra;
1593         if (!lp || !rp) {
1594             la = NODE_ADDR_NULL;
1595             ra = NODE_ADDR_NULL;
1596         } else {
1597             int lsize, rsize;
1598             /* Re-split the nodes into two plausibly sized ones. */
1599             lsize = lsub + rsub;
1600             rsize = lsize / 2;
1601             lsize -= rsize;
1602             bt_xform(bt, NODE_AS_IS, 0, lp, rp, sep,
1603                      NODE_ADDR_NULL, NODE_ADDR_NULL,
1604                      lsize-1, &lp, &rp, &sep);
1605             /* Unlock the nodes. */
1606             la = bt_write_unlock(bt, lp);
1607             ra = bt_write_unlock(bt, rp);
1608         }
1609
1610         /*
1611          * Now we have to do the addition thing: progress up the
1612          * tree replacing a single subtree pointer with the
1613          * la/sep/ra assembly, until no more nodes have to split as
1614          * a result.
1615          */
1616         while (nnodes-- > 0) {
1617             nodeptr n = nodes[nnodes];
1618             if (bt_subtrees(bt, n) == bt_max_subtrees(bt)) {
1619                 /* Split the node and carry on up. */
1620                 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1621                          n, NULL, sep, la, ra,
1622                          bt_min_subtrees(bt), &lp, &rp, &sep);
1623                 la = bt_write_unlock(bt, lp);
1624                 ra = bt_write_unlock(bt, rp);
1625             } else {
1626                 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1627                          n, NULL, sep, la, ra,
1628                          NODE_JOIN, &n, NULL, NULL);
1629                 bt_write_unlock(bt, n);
1630                 break;
1631             }
1632         }
1633
1634         /*
1635          * If nnodes < 0, we have just split the root and we need
1636          * to build a new root node.
1637          */
1638         if (nnodes < 0)
1639             bt_new_root(bt, la, ra, sep);
1640     }
1641
1642     /*
1643      * Now we just need to go back up and unlock any remaining
1644      * nodes. Also here we ensure the root points where it should.
1645      */
1646     while (nnodes-- > 0) {
1647         node_addr na;
1648         na = bt_write_unlock(bt, nodes[nnodes]);
1649         if (nnodes == 0)
1650             bt->root = na;
1651     }
1652
1653     if (nodessize) {
1654         ifree(nodes);
1655         ifree(childposns);
1656     }
1657 }
1658
1659 /*
1660  * External interfaces to the join functionality: join and joinr
1661  * (differing only in which B-tree structure they leave without any
1662  * elements, and which they return the combined tree in).
1663  */
1664 btree *bt_join(btree *bt1, btree *bt2)
1665 {
1666     nodeptr root1, root2;
1667     int size2;
1668
1669     size2 = bt_count(bt2);
1670     if (size2 > 0) {
1671         bt_element_t sep;
1672
1673         if (bt1->cmp) {
1674             /*
1675              * The trees are ordered, so verify the ordering
1676              * condition: ensure nothing in bt1 is greater than or
1677              * equal to the minimum element in bt2.
1678              */
1679             sep = bt_index(bt2, 0);
1680             sep = bt_findrelpos(bt1, sep, NULL, BT_REL_GE, NULL);
1681             if (sep)
1682                 return NULL;
1683         }
1684
1685         sep = bt_delpos(bt2, 0);
1686         root1 = bt_write_lock_root(bt1);
1687         root2 = bt_write_lock_root(bt2);
1688         bt_join_internal(bt1, root1, root2, sep, bt1->depth, bt2->depth);
1689         bt2->root = NODE_ADDR_NULL;
1690         bt2->depth = 0;
1691     }
1692     return bt1;
1693 }
1694
1695 btree *bt_joinr(btree *bt1, btree *bt2)
1696 {
1697     nodeptr root1, root2;
1698     int size1;
1699
1700     size1 = bt_count(bt1);
1701     if (size1 > 0) {
1702         bt_element_t sep;
1703
1704         if (bt2->cmp) {
1705             /*
1706              * The trees are ordered, so verify the ordering
1707              * condition: ensure nothing in bt2 is less than or
1708              * equal to the maximum element in bt1.
1709              */
1710             sep = bt_index(bt1, size1-1);
1711             sep = bt_findrelpos(bt2, sep, NULL, BT_REL_LE, NULL);
1712             if (sep)
1713                 return NULL;
1714         }
1715
1716         sep = bt_delpos(bt1, size1-1);
1717         root1 = bt_write_lock_root(bt1);
1718         root2 = bt_write_lock_root(bt2);
1719         bt_join_internal(bt2, root1, root2, sep, bt1->depth, bt2->depth);
1720         bt1->root = NODE_ADDR_NULL;
1721         bt1->depth = 0;
1722     }
1723     return bt2;
1724 }
1725
1726 /*
1727  * Perform the healing process after a tree has been split. `rhs'
1728  * is set if the cut edge is the one on the right.
1729  */
1730 static void bt_split_heal(btree *bt, int rhs)
1731 {
1732     nodeptr n;
1733     nodeptr *nodes;
1734     int nnodes;
1735
1736     nodes = inewn(nodeptr, bt->depth);
1737     nnodes = 0;
1738
1739     n = bt_write_lock_root(bt);
1740
1741     /*
1742      * First dispense with completely trivial cases: a root node
1743      * containing only one subtree can be thrown away instantly.
1744      */
1745     while (n && bt_subtrees(bt, n) == 1) {
1746         nodeptr n2 = bt_write_lock_child(bt, n, 0);
1747         bt_shift_root(bt, n, bt_node_addr(bt, n2));
1748         n = n2;
1749     }
1750
1751     /*
1752      * Now we have a plausible root node. Start going down the cut
1753      * edge looking for undersized or minimum nodes, and arranging
1754      * for them to be above minimum size.
1755      */
1756     while (n) {
1757         int edge, next, elt, size_e, size_n, size_total;
1758         nodeptr ne, nn, nl, nr;
1759         bt_element_t el;
1760
1761         nodes[nnodes++] = n;
1762
1763         if (rhs) {
1764             edge = bt_subtrees(bt, n) - 1;
1765             next = edge - 1;
1766             elt = next;
1767         } else {
1768             edge = 0;
1769             next = 1;
1770             elt = edge;
1771         }
1772
1773         ne = bt_write_lock_child(bt, n, edge);
1774         if (!ne)
1775             break;
1776
1777         size_e = bt_subtrees(bt, ne);
1778
1779         if (size_e <= bt_min_subtrees(bt)) {
1780             nn = bt_write_lock_child(bt, n, next);
1781             el = bt_element(bt, n, elt);
1782             size_n = bt_subtrees(bt, nn);
1783             if (edge < next)
1784                 nl = ne, nr = nn;
1785             else
1786                 nl = nn, nr = ne;
1787             size_total = size_e + size_n;
1788             if (size_e + size_n <= bt_max_subtrees(bt)) {
1789                 /*
1790                  * Merge the edge node and its sibling together.
1791                  */
1792                 bt_xform(bt, NODE_AS_IS, 0, nl, nr, el,
1793                          NODE_ADDR_NULL, NODE_ADDR_NULL,
1794                          NODE_JOIN, &ne, NULL, NULL);
1795                 bt_xform(bt, NODE_DEL_ELT, elt, n, NULL, NULL,
1796                          bt_node_addr(bt, ne), NODE_ADDR_NULL,
1797                          NODE_JOIN, &n, NULL, NULL);
1798                 /*
1799                  * It's possible we've just trashed the root of the
1800                  * tree, again.
1801                  */
1802                 if (bt_subtrees(bt, n) == 1) {
1803                     bt_shift_root(bt, n, bt_node_addr(bt, ne));
1804                     nnodes--;          /* and take it out of nodes[] */
1805                 }
1806             } else {
1807                 /*
1808                  * Redistribute subtrees between the edge node and
1809                  * its sibling.
1810                  */
1811                 int split;
1812                 size_e = (size_total + 1) / 2;
1813                 assert(size_e > bt_min_subtrees(bt));
1814                 if (next < edge)
1815                     split = size_total - size_e - 1;
1816                 else
1817                     split = size_e - 1;
1818                 bt_xform(bt, NODE_AS_IS, 0, nl, nr, el,
1819                          NODE_ADDR_NULL, NODE_ADDR_NULL,
1820                          split, &nl, &nr, &el);
1821                 bt_write_unlock(bt, nn);
1822                 bt_set_element(bt, n, elt, el);
1823             }
1824         }
1825
1826         n = ne;
1827     }
1828
1829     /*
1830      * Now we just need to go back up and unlock any remaining
1831      * nodes.
1832      */
1833     while (nnodes-- > 0)
1834         bt_write_unlock(bt, nodes[nnodes]);
1835
1836     ifree(nodes);
1837 }
1838
1839 /*
1840  * Split a tree by numeric position. The new tree returned is the
1841  * one on the right; the original tree contains the stuff on the
1842  * left.
1843  */
1844 static btree *bt_split_internal(btree *bt1, int index)
1845 {
1846     btree *bt2;
1847     nodeptr *lnodes, *rnodes;
1848     nodeptr n1, n2, n;
1849     int nnodes, child;
1850
1851     bt2 = bt_new(bt1->cmp, bt1->copy, bt1->freeelt, bt1->propsize,
1852                  bt1->propalign, bt1->propmake, bt1->propmerge,
1853                  bt1->userstate, bt1->mindegree);
1854     bt2->depth = bt1->depth;
1855
1856     lnodes = inewn(nodeptr, bt1->depth);
1857     rnodes = inewn(nodeptr, bt2->depth);
1858     nnodes = 0;
1859
1860     n1 = bt_write_lock_root(bt1);
1861     while (n1) {
1862         child = bt_lookup_pos(bt1, n1, &index, NULL);
1863         n = bt_write_lock_child(bt1, n1, child);
1864         bt_xform(bt1, NODE_ADD_ELT, child, n1, NULL, NULL,
1865                  bt_node_addr(bt1, n), NODE_ADDR_NULL,
1866                  child, &n1, &n2, NULL);
1867         lnodes[nnodes] = n1;
1868         rnodes[nnodes] = n2;
1869         if (nnodes > 0)
1870             bt_set_child(bt2, rnodes[nnodes-1], 0, bt_node_addr(bt2, n2));
1871         else
1872             bt2->root = bt_node_addr(bt2, n2);
1873         nnodes++;
1874         n1 = n;
1875     }
1876
1877     /*
1878      * Now we go back up and unlock all the nodes. At this point we
1879      * don't mess with user properties, because there's the danger
1880      * of a node containing no subtrees _or_ elements and hence us
1881      * having to invent a notation for an empty property. We're
1882      * going to make a second healing pass in a moment anyway,
1883      * which will sort all that out for us.
1884      */
1885     while (nnodes-- > 0) {
1886         bt_write_unlock_internal(bt1, lnodes[nnodes], FALSE);
1887         bt_write_unlock_internal(bt2, rnodes[nnodes], FALSE);
1888     }
1889
1890     /*
1891      * Then we make a healing pass down each side of the tree.
1892      */
1893     bt_split_heal(bt1, TRUE);
1894     bt_split_heal(bt2, FALSE);
1895
1896     ifree(lnodes);
1897     ifree(rnodes);
1898
1899     return bt2;
1900 }
1901
1902 /*
1903  * Split a tree at a numeric index.
1904  */
1905 btree *bt_splitpos(btree *bt, int index, int before)
1906 {
1907     btree *ret;
1908     node_addr na;
1909     int count, nd;
1910     nodeptr n;
1911
1912     n = bt_read_lock_root(bt);
1913     count = (n ? bt_node_count(bt, n) : 0);
1914     bt_read_unlock(bt, n);
1915
1916     if (index < 0 || index > count)
1917         return NULL;
1918
1919     ret = bt_split_internal(bt, index);
1920     if (before) {
1921         na = bt->root;
1922         bt->root = ret->root;
1923         ret->root = na;
1924
1925         nd = bt->depth;
1926         bt->depth = ret->depth;
1927         ret->depth = nd;
1928     }
1929     return ret;
1930 }
1931
1932 /*
1933  * Split a tree at a position dictated by the sorting order.
1934  */
1935 btree *bt_split(btree *bt, bt_element_t element, cmpfn_t cmp, int rel)
1936 {
1937     int before, index;
1938
1939     assert(rel != BT_REL_EQ);          /* has to be an inequality */
1940
1941     if (rel == BT_REL_GT || rel == BT_REL_GE) {
1942         before = TRUE;
1943         rel = (rel == BT_REL_GT ? BT_REL_LE : BT_REL_LT);
1944     } else {
1945         before = FALSE;
1946     }
1947     if (!bt_findrelpos(bt, element, cmp, rel, &index))
1948         index = -1;
1949     return bt_splitpos(bt, index+1, before);
1950 }
1951
1952 #ifdef TEST
1953
1954 #define TEST_DEGREE 4
1955 #define BT_COPY bt_clone
1956 #define MAXTREESIZE 10000
1957 #define MAXLOCKS 100
1958
1959 int errors;
1960
1961 /*
1962  * Error reporting function.
1963  */
1964 void error(char *fmt, ...) {
1965     va_list ap;
1966     fprintf(stderr, "ERROR: ");
1967     va_start(ap, fmt);
1968     vfprintf(stderr, fmt, ap);
1969     va_end(ap);
1970     fprintf(stderr, "\n");
1971     errors++;
1972 }
1973
1974 /*
1975  * See if a tree has a 2-element root node.
1976  */
1977 static int bt_tworoot(btree *bt)
1978 {
1979     nodeptr n;
1980     int i;
1981     n = bt_read_lock_root(bt);
1982     i = bt_subtrees(bt, n);
1983     bt_read_unlock(bt, n);
1984     return (i == 2 ? TRUE : FALSE);
1985 }
1986
1987 /*
1988  * Physically copy an entire B-tree. (NB this appears as a test
1989  * routine rather than a production one, since reference counting
1990  * and bt_clone() provide a better way to do this for real code. If
1991  * anyone really needs a genuine physical copy for anything other
1992  * than testing reasons, I suppose they could always lift this into
1993  * the admin section above.)
1994  */
1995
1996 static nodeptr bt_copy_node(btree *bt, nodeptr n)
1997 {
1998     int i, children;
1999     nodeptr ret;
2000
2001     children = bt_subtrees(bt, n);
2002     ret = bt_new_node(bt, children);
2003
2004     for (i = 0; i < children; i++) {
2005         nodeptr n2 = bt_read_lock_child(bt, n, i);
2006         nodeptr n3;
2007         if (n2) {
2008             n3 = bt_copy_node(bt, n2);
2009             bt_set_child(bt, ret, i, bt_write_unlock(bt, n3));
2010         } else {
2011             bt_set_child(bt, ret, i, NODE_ADDR_NULL);
2012         }
2013         bt_read_unlock(bt, n2);
2014
2015         if (i < children-1) {
2016             bt_element_t e = bt_element(bt, n, i);
2017             if (bt->copy)
2018                 e = bt->copy(bt->userstate, e);
2019             bt_set_element(bt, ret, i, e);
2020         }
2021     }
2022
2023     return ret;
2024 }
2025
2026 btree *bt_copy(btree *bt)
2027 {
2028     nodeptr n;
2029     btree *bt2;
2030
2031     bt2 = bt_new(bt->cmp, bt->copy, bt->freeelt, bt->propsize, bt->propalign,
2032                  bt->propmake, bt->propmerge, bt->userstate, bt->mindegree);
2033     bt2->depth = bt->depth;
2034
2035     n = bt_read_lock_root(bt);
2036     if (n)
2037         bt2->root = bt_write_unlock(bt2, bt_copy_node(bt, n));
2038     bt_read_unlock(bt, n);
2039
2040     return bt2;
2041 }
2042
2043 /*
2044  * This function is intended to be called from gdb when debugging
2045  * things.
2046  */
2047 void bt_dump_nodes(btree *bt, ...)
2048 {
2049     int i, children;
2050     va_list ap;
2051     nodeptr n;
2052
2053     va_start(ap, bt);
2054     while (1) {
2055         n = va_arg(ap, nodeptr);
2056         if (!n)
2057             break;
2058         printf("%p [%d]:", n, n[bt->maxdegree*2+1].i);
2059         children = bt_subtrees(bt, n);
2060         for (i = 0; i < children; i++) {
2061             printf(" %p", bt_child(bt, n, i).p);
2062             if (i < children-1)
2063                 printf(" %s", (char *)bt_element(bt, n, i));
2064         }
2065         printf("\n");
2066     }
2067     va_end(ap);
2068 }
2069
2070 /*
2071  * Verify a tree against an array. Checks that:
2072  *
2073  *  - every node has a valid number of subtrees
2074  *  - subtrees are either all present (internal node) or all absent
2075  *    (leaf)
2076  *  - elements are all present
2077  *  - every leaf is at exactly the depth claimed by the tree
2078  *  - the tree represents the correct list of elements in the
2079  *    correct order. (This also tests the ordering constraint,
2080  *    assuming the array is correctly constructed.)
2081  */
2082
2083 void verifynode(btree *bt, nodeptr n, bt_element_t *array, int *arraypos,
2084                 int depth)
2085 {
2086     int subtrees, min, max, i, before, after, count;
2087
2088     /* Check the subtree count. The root can have as few as 2 subtrees. */
2089     subtrees = bt_subtrees(bt, n);
2090     max = bt_max_subtrees(bt);
2091     min = (depth == 1) ? 2 : bt_min_subtrees(bt);
2092     if (subtrees > max)
2093         error("node %p has too many subtrees (%d > %d)", n, subtrees, max);
2094     if (subtrees < min)
2095         error("node %p has too few subtrees (%d < %d)", n, subtrees, min);
2096
2097     /* Check that subtrees are present or absent as required. */
2098     for (i = 0; i < subtrees; i++) {
2099         node_addr child = bt_child(bt, n, i);
2100         if (depth == bt->depth && child.p != NULL)
2101             error("leaf node %p child %d is %p not NULL\n", n, i, child);
2102         if (depth != bt->depth && child.p == NULL)
2103             error("non-leaf node %p child %d is NULL\n", n, i);
2104     }
2105
2106     /* Check that elements are all present. */
2107     for (i = 0; i < subtrees-1; i++) {
2108         bt_element_t elt = bt_element(bt, n, i);
2109         if (elt == NULL)
2110             error("node %p element %d is NULL\n", n, i);
2111     }
2112
2113     before = *arraypos;
2114
2115     /* Now verify the subtrees, and simultaneously check the ordering. */
2116     for (i = 0; i < subtrees; i++) {
2117         if (depth < bt->depth) {
2118             nodeptr child = bt_read_lock_child(bt, n, i);
2119             verifynode(bt, child, array, arraypos, depth+1);
2120             bt_read_unlock(bt, child);
2121         }
2122         if (i < subtrees-1) {
2123             bt_element_t elt = bt_element(bt, n, i);
2124             if (array[*arraypos] != elt) {
2125                 error("node %p element %d is \"%s\", but array[%d]=\"%s\"",
2126                       n, i, elt, *arraypos, array[*arraypos]);
2127             }
2128             (*arraypos)++;
2129         }
2130     }
2131
2132     after = *arraypos;
2133
2134     /* Check the node count. */
2135     count = bt_node_count(bt, n);
2136     if (count != after - before)
2137         error("node %p count is %d, should be %d", n, count, after - before);
2138
2139     /*
2140      * Check the user properties.
2141      */
2142     {
2143         nodecomponent *prop;
2144         int i;
2145         int max = 0, total = 0;
2146
2147         prop = n + bt->maxdegree * 2 + 2;
2148
2149         for (i = before; i < after; i++) {
2150             int c = (unsigned char)*(char *)array[i];
2151
2152             if (max < c) max = c;
2153             total += c;
2154         }
2155
2156         if (prop[0].i != total)
2157             error("node %p total prop is %d, should be %d", n,
2158                   prop[0].i, total);
2159         if (prop[1].i != max)
2160             error("node %p max prop is %d, should be %d", n,
2161                   prop[1].i, max);
2162     }
2163 }
2164
2165 void verifytree(btree *bt, bt_element_t *array, int arraylen)
2166 {
2167     nodeptr n;
2168     int i = 0;
2169     n = bt_read_lock_root(bt);
2170     if (n) {
2171         verifynode(bt, n, array, &i, 1);
2172         bt_read_unlock(bt, n);
2173     } else {
2174         if (bt->depth != 0) {
2175             error("tree has null root but depth is %d not zero", bt->depth);
2176         }
2177     }
2178     if (i != arraylen)
2179         error("tree contains %d elements, array contains %d",
2180               i, arraylen);
2181     testlock(-1, 0, NULL);
2182 }
2183
2184 int mycmp(void *state, void *av, void *bv) {
2185     char const *a = (char const *)av;
2186     char const *b = (char const *)bv;
2187     return strcmp(a, b);
2188 }
2189
2190 static void set_invalid_property(void *propv)
2191 {
2192     int *prop = (int *)propv;
2193     prop[0] = prop[1] = -1;
2194 }
2195
2196 void mypropmake(void *state, void *av, void *destv)
2197 {
2198     char const *a = (char const *)av;
2199     int *dest = (int *)destv;
2200     dest[0] = dest[1] = (unsigned char)*a;
2201 }
2202
2203 void mypropmerge(void *state, void *s1v, void *s2v, void *destv)
2204 {
2205     int *s1 = (int *)s1v;
2206     int *s2 = (int *)s2v;
2207     int *dest = (int *)destv;
2208     if (!s1v && !s2v) {
2209         /* Special `destroy' case. */
2210         set_invalid_property(destv);
2211         return;
2212     }
2213     assert(s2[0] >= 0 && s2[1] >= 0);
2214     assert(s1 == NULL || (s1[0] >= 0 && s1[1] >= 0));
2215     dest[0] = s2[0] + (s1 ? s1[0] : 0);
2216     dest[1] = (s1 && s1[1] > s2[1] ? s1[1] : s2[1]);
2217 }
2218
2219 void array_addpos(bt_element_t *array, int *arraylen, bt_element_t e, int i)
2220 {
2221     bt_element_t e2;
2222     int len = *arraylen;
2223
2224     assert(len < MAXTREESIZE);
2225
2226     while (i < len) {
2227         e2 = array[i];
2228         array[i] = e;
2229         e = e2;
2230         i++;
2231     }
2232     array[len] = e;
2233     *arraylen = len+1;
2234 }
2235
2236 void array_add(bt_element_t *array, int *arraylen, bt_element_t e)
2237 {
2238     int i;
2239     int len = *arraylen;
2240
2241     for (i = 0; i < len; i++)
2242         if (mycmp(NULL, array[i], e) >= 0)
2243             break;
2244     assert(i == len || mycmp(NULL, array[i], e) != 0);
2245     array_addpos(array, arraylen, e, i);
2246 }
2247
2248 void array_delpos(bt_element_t *array, int *arraylen, int i)
2249 {
2250     int len = *arraylen;
2251
2252     while (i < len-1) {
2253         array[i] = array[i+1];
2254         i++;
2255     }
2256     *arraylen = len-1;
2257 }
2258
2259 bt_element_t array_del(bt_element_t *array, int *arraylen, bt_element_t e)
2260 {
2261     int i;
2262     int len = *arraylen;
2263     bt_element_t ret;
2264
2265     for (i = 0; i < len; i++)
2266         if (mycmp(NULL, array[i], e) >= 0)
2267             break;
2268     if (i < len && mycmp(NULL, array[i], e) == 0) {
2269         ret = array[i];
2270         array_delpos(array, arraylen, i);
2271     } else
2272         ret = NULL;
2273     return ret;
2274 }
2275
2276 /* A sample data set and test utility. Designed for pseudo-randomness,
2277  * and yet repeatability. */
2278
2279 /*
2280  * This random number generator uses the `portable implementation'
2281  * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
2282  * change it if not.
2283  */
2284 int randomnumber(unsigned *seed) {
2285     *seed *= 1103515245;
2286     *seed += 12345;
2287     return ((*seed) / 65536) % 32768;
2288 }
2289
2290 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
2291
2292 char *strings[] = {
2293     "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
2294     "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
2295     "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
2296     "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
2297     "m", "s", "l", "4",
2298 };
2299
2300 #define NSTR lenof(strings)
2301
2302 void findtest(btree *tree, bt_element_t *array, int arraylen)
2303 {
2304     static const int rels[] = {
2305         BT_REL_EQ, BT_REL_GE, BT_REL_LE, BT_REL_LT, BT_REL_GT
2306     };
2307     static const char *const relnames[] = {
2308         "EQ", "GE", "LE", "LT", "GT"
2309     };
2310     int i, j, rel, index;
2311     char *p, *ret, *realret, *realret2;
2312     int lo, hi, mid, c;
2313
2314     for (i = 0; i < (int)NSTR; i++) {
2315         p = strings[i];
2316         for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
2317             rel = rels[j];
2318
2319             lo = 0; hi = arraylen-1;
2320             while (lo <= hi) {
2321                 mid = (lo + hi) / 2;
2322                 c = strcmp(p, array[mid]);
2323                 if (c < 0)
2324                     hi = mid-1;
2325                 else if (c > 0)
2326                     lo = mid+1;
2327                 else
2328                     break;
2329             }
2330
2331             if (c == 0) {
2332                 if (rel == BT_REL_LT)
2333                     ret = (mid > 0 ? array[--mid] : NULL);
2334                 else if (rel == BT_REL_GT)
2335                     ret = (mid < arraylen-1 ? array[++mid] : NULL);
2336                 else
2337                     ret = array[mid];
2338             } else {
2339                 assert(lo == hi+1);
2340                 if (rel == BT_REL_LT || rel == BT_REL_LE) {
2341                     mid = hi;
2342                     ret = (hi >= 0 ? array[hi] : NULL);
2343                 } else if (rel == BT_REL_GT || rel == BT_REL_GE) {
2344                     mid = lo;
2345                     ret = (lo < arraylen ? array[lo] : NULL);
2346                 } else
2347                     ret = NULL;
2348             }
2349
2350             realret = bt_findrelpos(tree, p, NULL, rel, &index);
2351             testlock(-1, 0, NULL);
2352             if (realret != ret) {
2353                 error("find(\"%s\",%s) gave %s should be %s",
2354                       p, relnames[j], realret, ret);
2355             }
2356             if (realret && index != mid) {
2357                 error("find(\"%s\",%s) gave %d should be %d",
2358                       p, relnames[j], index, mid);
2359             }
2360             if (realret && rel == BT_REL_EQ) {
2361                 realret2 = bt_index(tree, index);
2362                 if (realret2 != realret) {
2363                     error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
2364                           p, relnames[j], realret, index, index, realret2);
2365                 }
2366             }
2367         }
2368     }
2369
2370     realret = bt_findrelpos(tree, NULL, NULL, BT_REL_GT, &index);
2371     testlock(-1, 0, NULL);
2372     if (arraylen && (realret != array[0] || index != 0)) {
2373         error("find(NULL,GT) gave %s(%d) should be %s(0)",
2374               realret, index, array[0]);
2375     } else if (!arraylen && (realret != NULL)) {
2376         error("find(NULL,GT) gave %s(%d) should be NULL",
2377               realret, index);
2378     }
2379
2380     realret = bt_findrelpos(tree, NULL, NULL, BT_REL_LT, &index);
2381     testlock(-1, 0, NULL);
2382     if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
2383         error("find(NULL,LT) gave %s(%d) should be %s(0)",
2384               realret, index, array[arraylen-1]);
2385     } else if (!arraylen && (realret != NULL)) {
2386         error("find(NULL,LT) gave %s(%d) should be NULL",
2387               realret, index);
2388     }
2389 }
2390
2391 void splittest(btree *tree, bt_element_t *array, int arraylen)
2392 {
2393     int i;
2394     btree *tree3, *tree4;
2395     for (i = 0; i <= arraylen; i++) {
2396         printf("splittest: %d\n", i);
2397         tree3 = BT_COPY(tree);
2398         testlock(-1, 0, NULL);
2399         tree4 = bt_splitpos(tree3, i, 0);
2400         testlock(-1, 0, NULL);
2401         verifytree(tree3, array, i);
2402         verifytree(tree4, array+i, arraylen-i);
2403         bt_join(tree3, tree4);
2404         testlock(-1, 0, NULL);
2405         verifytree(tree4, NULL, 0);
2406         bt_free(tree4);        /* left empty by join */
2407         testlock(-1, 0, NULL);
2408         verifytree(tree3, array, arraylen);
2409         bt_free(tree3);
2410         testlock(-1, 0, NULL);
2411     }
2412 }
2413
2414 /*
2415  * Called to track read and write locks on nodes.
2416  */
2417 void testlock(int write, int set, nodeptr n)
2418 {
2419     static nodeptr readlocks[MAXLOCKS], writelocks[MAXLOCKS];
2420     static int nreadlocks = 0, nwritelocks = 0;
2421
2422     int i, rp, wp;
2423
2424     if (write == -1) {
2425         /* Called after an operation to ensure all locks are unlocked. */
2426         if (nreadlocks != 0 || nwritelocks != 0)
2427             error("at least one left-behind lock exists!");
2428         return;
2429     }
2430
2431     /* Locking NULL does nothing. Unlocking it is an error. */
2432     if (n == NULL) {
2433         if (!set)
2434             error("attempting to %s-unlock NULL", write ? "write" : "read");
2435         return;
2436     }
2437
2438     assert(nreadlocks < MAXLOCKS && nwritelocks < MAXLOCKS);
2439
2440     /* First look for the node in both lock lists. */
2441     rp = wp = -1;
2442     for (i = 0; i < nreadlocks; i++)
2443         if (readlocks[i] == n)
2444             rp = i;
2445     for (i = 0; i < nwritelocks; i++)
2446         if (writelocks[i] == n)
2447             wp = i;
2448
2449     /* Now diverge based on what we're supposed to be up to. */
2450     if (set) {
2451         /* Setting a lock. Should not already be locked in either list. */
2452         if (rp != -1 || wp != -1) {
2453             error("attempt to %s-lock node %p, already %s-locked",
2454                   (write ? "write" : "read"), n, (rp==-1 ? "write" : "read"));
2455         }
2456         if (write)
2457             writelocks[nwritelocks++] = n;
2458         else
2459             readlocks[nreadlocks++] = n;
2460     } else {
2461         /* Clearing a lock. Should exist in exactly the correct list. */
2462         if (write && rp != -1)
2463             error("attempt to write-unlock node %p which is read-locked", n);
2464         if (!write && wp != -1)
2465             error("attempt to read-unlock node %p which is write-locked", n);
2466         if (wp != -1) {
2467             nwritelocks--;
2468             for (i = wp; i < nwritelocks; i++)
2469                 writelocks[i] = writelocks[i+1];
2470         }
2471         if (rp != -1) {
2472             nreadlocks--;
2473             for (i = rp; i < nreadlocks; i++)
2474                 readlocks[i] = readlocks[i+1];
2475         }
2476     }
2477 }
2478
2479 int main(void) {
2480     int in[NSTR];
2481     int i, j, k;
2482     int tworoot, tmplen;
2483     unsigned seed = 0;
2484     bt_element_t *array;
2485     int arraylen;
2486     bt_element_t ret, ret2, item;
2487     btree *tree, *tree2, *tree3, *tree4;
2488
2489     setvbuf(stdout, NULL, _IOLBF, 0);
2490     setvbuf(stderr, NULL, _IOLBF, 0);
2491     errors = 0;
2492
2493     for (i = 0; i < (int)NSTR; i++) in[i] = 0;
2494     array = newn(bt_element_t, MAXTREESIZE);
2495     arraylen = 0;
2496     tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2497                   mypropmake, mypropmerge, NULL, TEST_DEGREE);
2498
2499     verifytree(tree, array, arraylen);
2500     for (i = 0; i < 10000; i++) {
2501         j = randomnumber(&seed);
2502         j %= NSTR;
2503         printf("trial: %d\n", i);
2504         if (in[j]) {
2505             printf("deleting %s (%d)\n", strings[j], j);
2506             ret2 = array_del(array, &arraylen, strings[j]);
2507             ret = bt_del(tree, strings[j]);
2508             testlock(-1, 0, NULL);
2509             assert((bt_element_t)strings[j] == ret && ret == ret2);
2510             verifytree(tree, array, arraylen);
2511             in[j] = 0;
2512         } else {
2513             printf("adding %s (%d)\n", strings[j], j);
2514             array_add(array, &arraylen, strings[j]);
2515             ret = bt_add(tree, strings[j]);
2516             testlock(-1, 0, NULL);
2517             assert(strings[j] == ret);
2518             verifytree(tree, array, arraylen);
2519             in[j] = 1;
2520         }
2521         /* disptree(tree); */
2522         findtest(tree, array, arraylen);
2523     }
2524
2525     while (arraylen > 0) {
2526         j = randomnumber(&seed);
2527         j %= arraylen;
2528         item = array[j];
2529         ret2 = array_del(array, &arraylen, item);
2530         ret = bt_del(tree, item);
2531         testlock(-1, 0, NULL);
2532         assert(ret2 == ret);
2533         verifytree(tree, array, arraylen);
2534     }
2535
2536     bt_free(tree);
2537     testlock(-1, 0, NULL);
2538
2539     /*
2540      * Now try an unsorted tree. We don't really need to test
2541      * delpos because we know del is based on it, so it's already
2542      * been tested in the above sorted-tree code; but for
2543      * completeness we'll use it to tear down our unsorted tree
2544      * once we've built it.
2545      */
2546     tree = bt_new(NULL, NULL, NULL, 2*sizeof(int), alignof(int),
2547                   mypropmake, mypropmerge, NULL, TEST_DEGREE);
2548     verifytree(tree, array, arraylen);
2549     for (i = 0; i < 1000; i++) {
2550         printf("trial: %d\n", i);
2551         j = randomnumber(&seed);
2552         j %= NSTR;
2553         k = randomnumber(&seed);
2554         k %= bt_count(tree)+1;
2555         testlock(-1, 0, NULL);
2556         printf("adding string %s at index %d\n", strings[j], k);
2557         array_addpos(array, &arraylen, strings[j], k);
2558         bt_addpos(tree, strings[j], k);
2559         testlock(-1, 0, NULL);
2560         verifytree(tree, array, arraylen);
2561     }
2562
2563     /*
2564      * While we have this tree in its full form, we'll take a copy
2565      * of it to use in split and join testing.
2566      */
2567     tree2 = BT_COPY(tree);
2568     testlock(-1, 0, NULL);
2569     verifytree(tree2, array, arraylen);/* check the copy is accurate */
2570     /*
2571      * Split tests. Split the tree at every possible point and
2572      * check the resulting subtrees.
2573      */
2574     tworoot = bt_tworoot(tree2);       /* see if it has a 2-root */
2575     testlock(-1, 0, NULL);
2576     splittest(tree2, array, arraylen);
2577     /*
2578      * Now do the split test again, but on a tree that has a 2-root
2579      * (if the previous one didn't) or doesn't (if the previous one
2580      * did).
2581      */
2582     tmplen = arraylen;
2583     while (bt_tworoot(tree2) == tworoot) {
2584         bt_delpos(tree2, --tmplen);
2585         testlock(-1, 0, NULL);
2586     }
2587     printf("now trying splits on second tree\n");
2588     splittest(tree2, array, tmplen);
2589     bt_free(tree2);
2590     testlock(-1, 0, NULL);
2591
2592     /*
2593      * Back to the main testing of uncounted trees.
2594      */
2595     while (bt_count(tree) > 0) {
2596         printf("cleanup: tree size %d\n", bt_count(tree));
2597         j = randomnumber(&seed);
2598         j %= bt_count(tree);
2599         printf("deleting string %s from index %d\n", (char *)array[j], j);
2600         ret = bt_delpos(tree, j);
2601         testlock(-1, 0, NULL);
2602         assert((bt_element_t)array[j] == ret);
2603         array_delpos(array, &arraylen, j);
2604         verifytree(tree, array, arraylen);
2605     }
2606     bt_free(tree);
2607     testlock(-1, 0, NULL);
2608
2609     /*
2610      * Finally, do some testing on split/join on _sorted_ trees. At
2611      * the same time, we'll be testing split on very small trees.
2612      */
2613     tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2614                   mypropmake, mypropmerge, NULL, TEST_DEGREE);
2615     arraylen = 0;
2616     for (i = 0; i < 16; i++) {
2617         array_add(array, &arraylen, strings[i]);
2618         ret = bt_add(tree, strings[i]);
2619         testlock(-1, 0, NULL);
2620         assert(strings[i] == ret);
2621         verifytree(tree, array, arraylen);
2622         tree2 = BT_COPY(tree);
2623         splittest(tree2, array, arraylen);
2624         testlock(-1, 0, NULL);
2625         bt_free(tree2);
2626         testlock(-1, 0, NULL);
2627     }
2628     bt_free(tree);
2629     testlock(-1, 0, NULL);
2630
2631     /*
2632      * Test silly cases of join: join(emptytree, emptytree), and
2633      * also ensure join correctly spots when sorted trees fail the
2634      * ordering constraint.
2635      */
2636     tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2637                   mypropmake, mypropmerge, NULL, TEST_DEGREE);
2638     tree2 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2639                    mypropmake, mypropmerge, NULL, TEST_DEGREE);
2640     tree3 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2641                    mypropmake, mypropmerge, NULL, TEST_DEGREE);
2642     tree4 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2643                    mypropmake, mypropmerge, NULL, TEST_DEGREE);
2644     assert(mycmp(NULL, strings[0], strings[1]) < 0);   /* just in case :-) */
2645     bt_add(tree2, strings[1]);
2646     testlock(-1, 0, NULL);
2647     bt_add(tree4, strings[0]);
2648     testlock(-1, 0, NULL);
2649     array[0] = strings[0];
2650     array[1] = strings[1];
2651     verifytree(tree, array, 0);
2652     verifytree(tree2, array+1, 1);
2653     verifytree(tree3, array, 0);
2654     verifytree(tree4, array, 1);
2655
2656     /*
2657      * So:
2658      *  - join(tree,tree3) should leave both tree and tree3 unchanged.
2659      *  - joinr(tree,tree2) should leave both tree and tree2 unchanged.
2660      *  - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
2661      *  - join(tree, tree2) should move the element from tree2 to tree.
2662      *  - joinr(tree4, tree3) should move the element from tree4 to tree3.
2663      *  - join(tree,tree3) should return NULL and leave both unchanged.
2664      *  - join(tree3,tree) should work and create a bigger tree in tree3.
2665      */
2666     assert(tree == bt_join(tree, tree3));
2667     testlock(-1, 0, NULL);
2668     verifytree(tree, array, 0);
2669     verifytree(tree3, array, 0);
2670     assert(tree2 == bt_joinr(tree, tree2));
2671     testlock(-1, 0, NULL);
2672     verifytree(tree, array, 0);
2673     verifytree(tree2, array+1, 1);
2674     assert(tree4 == bt_join(tree4, tree3));
2675     testlock(-1, 0, NULL);
2676     verifytree(tree3, array, 0);
2677     verifytree(tree4, array, 1);
2678     assert(tree == bt_join(tree, tree2));
2679     testlock(-1, 0, NULL);
2680     verifytree(tree, array+1, 1);
2681     verifytree(tree2, array, 0);
2682     assert(tree3 == bt_joinr(tree4, tree3));
2683     testlock(-1, 0, NULL);
2684     verifytree(tree3, array, 1);
2685     verifytree(tree4, array, 0);
2686     assert(NULL == bt_join(tree, tree3));
2687     testlock(-1, 0, NULL);
2688     verifytree(tree, array+1, 1);
2689     verifytree(tree3, array, 1);
2690     assert(tree3 == bt_join(tree3, tree));
2691     testlock(-1, 0, NULL);
2692     verifytree(tree3, array, 2);
2693     verifytree(tree, array, 0);
2694
2695     bt_free(tree);
2696     testlock(-1, 0, NULL);
2697     bt_free(tree2);
2698     testlock(-1, 0, NULL);
2699     bt_free(tree3);
2700     testlock(-1, 0, NULL);
2701     bt_free(tree4);
2702     testlock(-1, 0, NULL);
2703
2704     sfree(array);
2705
2706     if (errors)
2707         fprintf(stderr, "%d errors!\n", errors);
2708     return (errors != 0 ? 1 : 0);
2709 }
2710
2711 #endif