Remove an unused variable spotted by Ubuntu 12.04's gcc.
[sgt/library] / btree.c
CommitLineData
e7f01466 1/*
2 * Flexible B-tree implementation. Supports reference counting for
3 * copy-on-write, user-defined node properties, and variable
4 * degree.
5 *
6 * This file is copyright 2001,2004 Simon Tatham.
7 *
8 * Permission is hereby granted, free of charge, to any person
9 * obtaining a copy of this software and associated documentation
10 * files (the "Software"), to deal in the Software without
11 * restriction, including without limitation the rights to use,
12 * copy, modify, merge, publish, distribute, sublicense, and/or
13 * sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following
15 * conditions:
16 *
17 * The above copyright notice and this permission notice shall be
18 * included in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
25 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30/*
31 * TODO:
32 *
33 * Possibly TODO in future, but may not be sensible in this code
34 * architecture:
35 *
36 * - user write properties.
37 * * this all happens during write_unlock(), I think. Except
38 * that we'll now need an _internal_ write_unlock() which
39 * does everything except user write properties. Sigh.
40 * * note that we also need a transform function for elements
41 * (rot13 will certainly require this, and reverse will
42 * require it if the elements themselves are in some way
43 * reversible).
44 *
45 * Still untested:
46 * - searching on user read properties.
47 * - user-supplied copy function.
48 * - bt_add when element already exists.
49 * - bt_del when element doesn't.
50 * - splitpos with before==TRUE.
51 * - split() on sorted elements (but it should be fine).
52 * - bt_replace, at all (it won't be useful until we get user read
53 * properties).
54 * - bt_index_w (won't make much sense until we start using
55 * user-supplied copy fn).
56 */
57
58#include <stdlib.h>
59#include <string.h>
60#include <assert.h>
61
62#ifdef TEST
63#include <stdio.h>
64#include <stdarg.h>
65#endif
66
67#include "btree.h"
68
69#ifdef TEST
70static void set_invalid_property(void *prop);
71#endif
72
73/* ----------------------------------------------------------------------
74 * Type definitions.
75 */
76
77typedef union nodecomponent nodecomponent;
78typedef nodecomponent *nodeptr;
79
80/*
81 * For type-checking purposes, and to ensure I don't accidentally
82 * confuse node_addr with node_ptr during implementation, I'll
83 * define node_addr for the in-memory case as being a struct
84 * containing only a nodeptr.
85 *
86 * This unfortunately needs to go in btree.h so that clients
87 * writing user properties can know about the nodecomponent
88 * structure.
89 */
90typedef struct {
91 nodeptr p;
92} node_addr;
93
94/*
95 * A B-tree node is a horrible thing when you're trying to be
96 * flexible. It is of variable size, and it contains a variety of
97 * distinct types of thing: nodes, elements, some counters, some
98 * user-defined properties ... it's a horrible thing. So we define
99 * it as an array of unions, each union being either an `int' or a
100 * `bt_element_t' or a `node_addr'...
101 */
102
103union nodecomponent {
104 int i;
105 node_addr na;
106 bt_element_t ep;
107};
108
109static const node_addr NODE_ADDR_NULL = { NULL };
110
111/*
112 * The array of nodecomponents will take the following form:
113 *
114 * - (maxdegree) child pointers.
115 * - (maxdegree-1) element pointers.
116 * - one subtree count (current number of child pointers that are
117 * valid; note that `valid' doesn't imply non-NULL).
118 * - one element count.
119 * - one reference count.
120 */
121
122struct btree {
123 int mindegree; /* min number of subtrees */
124 int maxdegree; /* max number of subtrees */
125 int depth; /* helps to store this explicitly */
126 node_addr root;
127 cmpfn_t cmp;
128 copyfn_t copy;
129 freefn_t freeelt;
130 int propsize, propalign, propoffset;
131 propmakefn_t propmake;
132 propmergefn_t propmerge;
133 void *userstate; /* passed to all user functions */
134};
135
136/* ----------------------------------------------------------------------
137 * Memory management routines and other housekeeping.
138 */
139#ifdef HAVE_ALLOCA
140# define ialloc(x) alloca(x)
141# define ifree(x)
142#else
143# define ialloc(x) smalloc(x)
144# define ifree(x) sfree(x)
145#endif
146
147#define new1(t) ( (t *) smalloc(sizeof(t)) )
148#define newn(t, n) ( (t *) smalloc((n) * sizeof(t)) )
149#define inew1(t) ( (t *) ialloc(sizeof(t)) )
150#define inewn(t, n) ( (t *) ialloc((n) * sizeof(t)) )
151
152static void *smalloc(size_t size)
153{
154 void *ret = malloc(size);
155 if (!ret)
156 abort();
157 return ret;
158}
159
160static void sfree(void *p)
161{
162 free(p);
163}
164
165#ifndef FALSE
166#define FALSE 0
167#endif
168#ifndef TRUE
169#define TRUE 1
170#endif
171
172/* We could probably do with more compiler-specific branches of this #if. */
173#if defined(__GNUC__)
174#define INLINE __inline
175#else
176#define INLINE
177#endif
178
179/* Hooks into the low-level code for test purposes. */
180#ifdef TEST
181void testlock(int write, int set, nodeptr n);
182#else
183#define testlock(w,s,n)
184#endif
185
186/* ----------------------------------------------------------------------
187 * Low-level helper routines, which understand the in-memory format
188 * of a node and know how to read-lock and write-lock.
189 */
190
191/*
192 * Read and write the node_addr of a child.
193 */
194static INLINE node_addr bt_child(btree *bt, nodeptr n, int index)
195{
196 return n[index].na;
197}
198static INLINE void bt_set_child(btree *bt, nodeptr n,
199 int index, node_addr value)
200{
201 n[index].na = value;
202}
203
204/*
205 * Read and write the address of an element.
206 */
207static INLINE bt_element_t bt_element(btree *bt, nodeptr n, int index)
208{
209 return n[bt->maxdegree + index].ep;
210}
211static INLINE void bt_set_element(btree *bt, nodeptr n,
212 int index, bt_element_t value)
213{
214 n[bt->maxdegree + index].ep = value;
215}
216
217/*
218 * Give the number of subtrees currently present in an element.
219 */
220static INLINE int bt_subtrees(btree *bt, nodeptr n)
221{
222 return n[bt->maxdegree*2-1].i;
223}
224#define bt_elements(bt,n) (bt_subtrees(bt,n) - 1)
225
226/*
227 * Give the minimum and maximum number of subtrees allowed in a
228 * node.
229 */
230static INLINE int bt_min_subtrees(btree *bt)
231{
232 return bt->mindegree;
233}
234static INLINE int bt_max_subtrees(btree *bt)
235{
236 return bt->maxdegree;
237}
238
239/*
240 * Return the count of items, and the user properties, in a
241 * particular subtree of a node.
242 *
243 * Note that in the in-memory form of the tree, this breaks the
244 * read-locking semantics, by reading the counts out of the child
245 * nodes without bothering to lock them. We're allowed to do this
246 * because this function is implemented at the same very low level
247 * as the implementation of bt_read_lock(), so we're allowed to
248 * know that read locking actually doesn't do anything.
249 */
250static INLINE int bt_child_count(btree *bt, nodeptr n, int index)
251{
252 if (n[index].na.p)
253 return n[index].na.p[bt->maxdegree*2].i;
254 else
255 return 0;
256}
257
258static INLINE void *bt_child_prop(btree *bt, nodeptr n, int index)
259{
260 if (n[index].na.p)
261 return (char *)n[index].na.p + bt->propoffset;
262 else
263 return NULL;
264}
265
266/*
267 * Return the count of items in a whole node.
268 */
269static INLINE int bt_node_count(btree *bt, nodeptr n)
270{
271 return n[bt->maxdegree*2].i;
272}
273
274/*
275 * Determine whether a node is a leaf node or not.
276 */
277static INLINE int bt_is_leaf(btree *bt, nodeptr n)
278{
279 return n[0].na.p == NULL;
280}
281
282/*
283 * Create a new write-locked node, and return a pointer to it.
284 */
285static INLINE nodeptr bt_new_node(btree *bt, int nsubtrees)
286{
287 nodeptr ret = (nodecomponent *)smalloc(bt->propoffset + bt->propsize);
288 ret[bt->maxdegree*2-1].i = nsubtrees;
289 ret[bt->maxdegree*2+1].i = 1; /* reference count 1 */
290#ifdef TEST
291 set_invalid_property(ret + bt->maxdegree * 2 + 2);
292#else
293 memset((char *)ret + bt->propoffset, 0, bt->propsize);
294#endif
295 testlock(TRUE, TRUE, ret);
296 return ret;
297}
298
299/*
300 * Destroy a node (must be write-locked).
301 */
302static INLINE void bt_destroy_node(btree *bt, nodeptr n)
303{
304 testlock(TRUE, FALSE, n);
305 /* Free the property. */
306 bt->propmerge(bt->userstate, NULL, NULL, n + bt->maxdegree * 2 + 2);
307 sfree(n);
308}
309
310/*
311 * Take an existing node and prepare to re-use it in a new context.
312 */
313static INLINE nodeptr bt_reuse_node(btree *bt, nodeptr n, int nsubtrees)
314{
315 testlock(TRUE, FALSE, n);
316 testlock(TRUE, TRUE, n);
317 n[bt->maxdegree*2-1].i = nsubtrees;
318 return n;
319}
320
321/*
322 * Return an extra reference to a node, for purposes of cloning. So
323 * we have to update its reference count as well.
324 */
325static INLINE node_addr bt_ref_node(btree *bt, node_addr n)
326{
327 if (n.p)
328 n.p[bt->maxdegree*2+1].i++;
329 return n;
330}
331
332/*
333 * Drop a node's reference count, for purposes of freeing. Returns
334 * the new reference count. Typically this will be tested against
335 * zero to see if the node needs to be physically freed; hence a
336 * NULL node_addr causes a return of 1 (because this isn't
337 * necessary).
338 */
339static INLINE int bt_unref_node(btree *bt, node_addr n)
340{
341 if (n.p) {
342 n.p[bt->maxdegree*2+1].i--;
343 return n.p[bt->maxdegree*2+1].i;
344 } else
345 return 1; /* a NULL node is considered OK */
346}
347
348/*
349 * Clone a node during write unlocking, if its reference count is
350 * more than one.
351 */
352static nodeptr bt_clone_node(btree *bt, nodeptr n)
353{
354 int i;
355 nodeptr ret = (nodecomponent *)smalloc(bt->propoffset + bt->propsize);
356 memcpy(ret, n, (bt->maxdegree*2+1) * sizeof(nodecomponent));
357 if (bt->copy) {
358 for (i = 0; i < bt_elements(bt, ret); i++) {
359 bt_element_t *e = bt_element(bt, ret, i);
360 bt_set_element(bt, ret, i, bt->copy(bt->userstate, e));
361 }
362 }
363 ret[bt->maxdegree*2+1].i = 1; /* clone has reference count 1 */
364 n[bt->maxdegree*2+1].i--; /* drop original's ref count by one */
365 /*
366 * At this low level, we're allowed to reach directly into the
367 * subtrees to fiddle with their reference counts without
368 * having to lock them.
369 */
370 for (i = 0; i < bt_subtrees(bt, ret); i++) {
371 node_addr na = bt_child(bt, ret, i);
372 if (na.p)
373 na.p[bt->maxdegree*2+1].i++; /* inc ref count of each child */
374 }
375 /*
376 * Copy the user property explicitly (in case it contains a
377 * pointer to an allocated area).
378 */
379 memset((char *)ret + bt->propoffset, 0, bt->propsize);
380 bt->propmerge(bt->userstate, NULL, n + bt->maxdegree * 2 + 2,
381 ret + bt->maxdegree * 2 + 2);
382 return ret;
383}
384
385/*
386 * Return the node_addr for a currently locked node. NB that this
387 * means node movement must take place during _locking_ rather than
388 * unlocking!
389 */
390static INLINE node_addr bt_node_addr(btree *bt, nodeptr n)
391{
392 node_addr ret;
393 ret.p = n;
394 return ret;
395}
396
397/*
398 * The bt_write_lock and bt_read_lock functions should gracefully
399 * handle being asked to write-lock a null node pointer, and just
400 * return a null nodeptr.
401 */
402static INLINE nodeptr bt_write_lock_child(btree *bt, nodeptr a, int index)
403{
404 node_addr addr = bt_child(bt, a, index);
405 if (addr.p && addr.p[bt->maxdegree*2+1].i > 1) {
406 nodeptr clone = bt_clone_node(bt, addr.p);
407 bt_set_child(bt, a, index, bt_node_addr(bt, clone));
408 testlock(TRUE, TRUE, clone);
409 return clone;
410 }
411 testlock(TRUE, TRUE, addr.p);
412 return addr.p;
413}
414static INLINE nodeptr bt_write_lock_root(btree *bt)
415{
416 node_addr addr = bt->root;
417 if (addr.p && addr.p[bt->maxdegree*2+1].i > 1) {
418 nodeptr clone = bt_clone_node(bt, addr.p);
419 bt->root = bt_node_addr(bt, clone);
420 testlock(TRUE, TRUE, clone);
421 return clone;
422 }
423 testlock(TRUE, TRUE, addr.p);
424 return addr.p;
425}
426static INLINE nodeptr bt_read_lock(btree *bt, node_addr a)
427{
428 testlock(FALSE, TRUE, a.p);
429 return a.p;
430}
431#define bt_read_lock_root(bt) (bt_read_lock(bt, (bt)->root))
432#define bt_read_lock_child(bt,a,index) (bt_read_lock(bt,bt_child(bt,a,index)))
433
434static INLINE void bt_write_relock(btree *bt, nodeptr n, int props)
435{
436 int i, ns, count;
437
438 /*
439 * Update the count in the node.
440 */
441 ns = bt_subtrees(bt, n);
442 count = ns-1; /* count the elements */
443 for (i = 0; i < ns; i++)
444 count += bt_child_count(bt, n, i);
445 n[bt->maxdegree*2].i = count;
446 testlock(TRUE, FALSE, n);
447 testlock(TRUE, TRUE, n);
448
449 /*
450 * Update user read properties.
451 */
452 if (props && bt->propsize) {
453 void *prevprop, *eltprop, *thisprop, *childprop;
454
455 prevprop = NULL;
456 eltprop = ialloc(bt->propsize);
457 thisprop = (void *)((char *)n + bt->propoffset);
458
459 for (i = 0; i < ns; i++) {
460 /* Merge a subtree's property into this one.
461 * Initially prevprop==NULL, meaning to just copy. */
462 if ( (childprop = bt_child_prop(bt, n, i)) != NULL ) {
463 bt->propmerge(bt->userstate, prevprop, childprop, thisprop);
464 prevprop = thisprop;
465 }
466
467 if (i < ns-1) {
468 /* Now merge in the separating element. */
469 bt->propmake(bt->userstate, bt_element(bt, n, i), eltprop);
470 bt->propmerge(bt->userstate, prevprop, eltprop, thisprop);
471 prevprop = thisprop;
472 }
473 }
474
475 ifree(eltprop);
476 }
477}
478
479static INLINE node_addr bt_write_unlock_internal(btree *bt, nodeptr n,
480 int props)
481{
482 node_addr ret;
483
484 bt_write_relock(bt, n, props);
485
486 testlock(TRUE, FALSE, n);
487
488 ret.p = n;
489 return ret;
490}
491
492static INLINE node_addr bt_write_unlock(btree *bt, nodeptr n)
493{
494 return bt_write_unlock_internal(bt, n, TRUE);
495}
496
497static INLINE void bt_read_unlock(btree *bt, nodeptr n)
498{
499 /*
500 * For trees in memory, we do nothing here, except run some
501 * optional testing.
502 */
503 testlock(FALSE, FALSE, n);
504}
505
506/* ----------------------------------------------------------------------
507 * Higher-level helper functions, which should be independent of
508 * the knowledge of precise node structure in the above code.
509 */
510
511/*
512 * Return the count of items below a node that appear before the
513 * start of a given subtree.
514 */
515static int bt_child_startpos(btree *bt, nodeptr n, int index)
516{
517 int pos = 0;
518
519 while (index > 0) {
520 index--;
521 pos += bt_child_count(bt, n, index) + 1; /* 1 for separating elt */
522 }
523 return pos;
524}
525
526/*
527 * Create a new root node for a tree.
528 */
529static void bt_new_root(btree *bt, node_addr left, node_addr right,
530 bt_element_t element)
531{
532 nodeptr n;
533 n = bt_new_node(bt, 2);
534 bt_set_child(bt, n, 0, left);
535 bt_set_child(bt, n, 1, right);
536 bt_set_element(bt, n, 0, element);
537 bt->root = bt_write_unlock(bt, n);
538 bt->depth++;
539}
540
541/*
542 * Discard the root node of a tree, and enshrine a new node as the
543 * root. Expects to be passed a write-locked nodeptr to the old
544 * root.
545 */
546static void bt_shift_root(btree *bt, nodeptr n, node_addr na)
547{
548 bt_destroy_node(bt, n);
549 bt->root = na;
550 bt->depth--;
551}
552
553/*
554 * Given a numeric index within a node, find which subtree we would
555 * descend to in order to find that index.
556 *
557 * Updates `pos' to give the numeric index within the subtree
558 * found. Also returns `ends' (if non-NULL), which has bit 0 set if
559 * the index is at the very left edge of the subtree, and/or bit 1
560 * if it's at the very right edge.
561 *
562 * Return value is the number of the subtree (0 upwards).
563 */
564#define ENDS_NONE 0
565#define ENDS_LEFT 1
566#define ENDS_RIGHT 2
567#define ENDS_BOTH 3
568static int bt_lookup_pos(btree *bt, nodeptr n, int *pos, int *ends)
569{
570 int child = 0;
571 int nchildren = bt_subtrees(bt, n);
572
573 while (child < nchildren) {
574 int count = bt_child_count(bt, n, child);
575 if (*pos <= count) {
576 if (ends) {
577 *ends = 0;
578 if (*pos == count)
579 *ends |= ENDS_RIGHT;
580 if (*pos == 0)
581 *ends |= ENDS_LEFT;
582 }
583 return child;
584 }
585 *pos -= count + 1; /* 1 for the separating element */
586 child++;
587 }
588 return -1; /* ran off the end; shouldn't happen */
589}
590
591/*
592 * Given an element to search for within a node, find either the
593 * element, or which subtree we would descend to to continue
594 * searching for that element.
595 *
596 * Return value is either the index of the element, or the index of
597 * the subtree (both 0 upwards). `is_elt' returns FALSE or TRUE
598 * respectively.
599 *
600 * Since this may be used by bt_find() with an alternative cmpfn_t,
601 * we always pass the input element as the first argument to cmp.
602 */
603static int bt_lookup_cmp(btree *bt, nodeptr n, bt_element_t element,
604 cmpfn_t cmp, int *is_elt)
605{
606 int mintree = 0, maxtree = bt_subtrees(bt, n)-1;
607
608 while (mintree < maxtree) {
609 int elt = (maxtree + mintree) / 2;
610 int c = cmp(bt->userstate, element, bt_element(bt, n, elt));
611
612 if (c == 0) {
613 *is_elt = TRUE;
614 return elt;
615 } else if (c < 0) {
616 /*
617 * `element' is less than element `elt'. So it can be
618 * in subtree number `elt' at the highest.
619 */
620 maxtree = elt;
621 } else { /* c > 0 */
622 /*
623 * `element' is greater than element `elt'. So it can
624 * be in subtree number (elt+1) at the lowest.
625 */
626 mintree = elt+1;
627 }
628 }
629
630 /*
631 * If we reach here without returning, we must have narrowed
632 * our search to the point where mintree = maxtree. So the
633 * element is not in the node itself and we know which subtree
634 * to search next.
635 */
636 assert(mintree == maxtree);
637 *is_elt = FALSE;
638 return mintree;
639}
640
641/*
642 * Generic transformations on B-tree nodes.
643 *
644 * This function divides essentially into an input side and an
645 * output side. The input side accumulates a list of items
646 * node,element,node,element,...,element,node; the output side
647 * writes those items into either one or two nodes.
648 *
649 * `intype' can be:
650 *
651 * - NODE_AS_IS. The input list is the contents of in1, followed
652 * by inelt, followed by the contents of in2. The `extra'
653 * parameters are unused, as is `inaux'.
654 *
655 * - NODE_ADD_ELT. `in2' is unused. The input list is the contents
656 * of `in1', but with subtree pointer number `inaux' replaced by
657 * extra1/inelt/extra2.
658 *
659 * - NODE_DEL_ELT. `in2' and `inelt' are unused, as is `extra2'.
660 * The input list is the contents of `in1', but with element
661 * pointer number `inaux' and its surrounding two subtrees
662 * replaced by extra1.
663 *
664 * Having obtained the input list, it is then written to one or two
665 * output nodes. If `splitpos' is NODE_JOIN, everything is written
666 * into one output node `out1'. Otherwise, `splitpos' is treated as
667 * an element index within the input list; that element is returned
668 * in `outelt', and the contents of the list is divided there and
669 * returned in nodes `out1' and `out2'.
670 *
671 * This function will re-use nodes in the `obvious' order. If two
672 * nodes are passed in and two nodes are output, they'll be the
673 * same nodes; if one node is passed in and one node output, it
674 * will be the same node too. If two are passed in and only one
675 * output, the first one will be used and the second destroyed; if
676 * one node is passed in and two are output, the one passed in will
677 * be the first of those returned, and the second will be new.
678 */
679#define NODE_AS_IS 1
680#define NODE_ADD_ELT 2
681#define NODE_DEL_ELT 3
682#define NODE_JOIN -1
683static void bt_xform(btree *bt, int intype, int inaux,
684 nodeptr in1, nodeptr in2, bt_element_t inelt,
685 node_addr extra1, node_addr extra2,
686 int splitpos, nodeptr *out1, nodeptr *out2,
687 bt_element_t *outelt)
688{
689 node_addr *nodes;
690 bt_element_t *elements;
691 nodeptr ret1, ret2;
692 int n1, n2, off2, i, j;
693
694 nodes = inewn(node_addr, 2 * bt_max_subtrees(bt));
695 elements = inewn(bt_element_t, 2 * bt_max_subtrees(bt));
696
697 /*
698 * Accumulate the input list.
699 */
700 switch(intype) {
701 case NODE_AS_IS:
702 n1 = bt_subtrees(bt, in1);
703 n2 = bt_subtrees(bt, in2);
704 off2 = 0;
705 break;
706 case NODE_ADD_ELT:
707 in2 = in1;
708 n1 = inaux+1;
709 n2 = bt_subtrees(bt, in1) - inaux;
710 off2 = inaux;
711 break;
712 case NODE_DEL_ELT:
713 in2 = in1;
714 n1 = inaux+1;
715 n2 = bt_subtrees(bt, in1) - inaux - 1;
716 off2 = inaux+1;
717 break;
718 }
719 i = j = 0;
720 while (j < n1) {
721 nodes[i] = bt_child(bt, in1, j);
722 if (j+1 < n1)
723 elements[i] = bt_element(bt, in1, j);
724 i++, j++;
725 }
726 if (intype == NODE_DEL_ELT) {
727 i--;
728 }
729 j = 0;
730 while (j < n2) {
731 nodes[i] = bt_child(bt, in2, off2+j);
732 if (j+1 < n2)
733 elements[i] = bt_element(bt, in2, off2+j);
734 i++, j++;
735 }
736 switch (intype) {
737 case NODE_AS_IS:
738 elements[n1-1] = inelt;
739 break;
740 case NODE_ADD_ELT:
741 nodes[n1-1] = extra1;
742 nodes[n1] = extra2;
743 elements[n1-1] = inelt;
744 break;
745 case NODE_DEL_ELT:
746 nodes[n1-1] = extra1;
747 break;
748 }
749
750 /*
751 * Now determine how many subtrees go in each output node, and
752 * actually create the nodes to be returned.
753 */
754 if (splitpos != NODE_JOIN) {
755 n1 = splitpos+1, n2 = i - splitpos - 1;
756 if (outelt)
757 *outelt = elements[splitpos];
758 } else {
759 n1 = i, n2 = 0;
760 }
761
762 ret1 = bt_reuse_node(bt, in1, n1);
763 if (intype == NODE_AS_IS && in2) {
764 /* We have a second input node. */
765 if (n2)
766 ret2 = bt_reuse_node(bt, in2, n2);
767 else
768 bt_destroy_node(bt, in2);
769 } else {
770 /* We have no second input node. */
771 if (n2)
772 ret2 = bt_new_node(bt, n2);
773 else
774 ret2 = NULL;
775 }
776
777 if (out1) *out1 = ret1;
778 if (out2) *out2 = ret2;
779
780 for (i = 0; i < n1; i++) {
781 bt_set_child(bt, ret1, i, nodes[i]);
782 if (i+1 < n1)
783 bt_set_element(bt, ret1, i, elements[i]);
784 }
785 if (n2) {
786 if (outelt) *outelt = elements[n1-1];
787 for (i = 0; i < n2; i++) {
788 bt_set_child(bt, ret2, i, nodes[n1+i]);
789 if (i+1 < n2)
790 bt_set_element(bt, ret2, i, elements[n1+i]);
791 }
792 }
793
794 ifree(nodes);
795 ifree(elements);
796}
797
798/*
799 * Fiddly little compare functions for use in special cases of
800 * findrelpos. One always returns +1 (a > b), the other always
801 * returns -1 (a < b).
802 */
803static int bt_cmp_greater(void *state,
804 const bt_element_t a, const bt_element_t b)
805{
806 return +1;
807}
808static int bt_cmp_less(void *state,
809 const bt_element_t a, const bt_element_t b)
810{
811 return -1;
812}
813
814/* ----------------------------------------------------------------------
815 * User-visible administration routines.
816 */
817
818btree *bt_new(cmpfn_t cmp, copyfn_t copy, freefn_t freeelt,
819 int propsize, int propalign, propmakefn_t propmake,
820 propmergefn_t propmerge, void *state, int mindegree)
821{
822 btree *ret;
823
824 ret = new1(btree);
825 ret->mindegree = mindegree;
826 ret->maxdegree = 2*mindegree;
827 ret->depth = 0; /* not even a root right now */
828 ret->root = NODE_ADDR_NULL;
829 ret->cmp = cmp;
830 ret->copy = copy;
831 ret->freeelt = freeelt;
832 ret->propsize = propsize;
833 ret->propalign = propalign;
834 ret->propoffset = sizeof(nodecomponent) * (ret->maxdegree*2 + 2);
835 if (propalign > 0) {
836 ret->propoffset += propalign - 1;
837 ret->propoffset -= ret->propoffset % propalign;
838 }
839 ret->propmake = propmake;
840 ret->propmerge = propmerge;
841 ret->userstate = state;
842
843 return ret;
844}
845
846static void bt_free_node(btree *bt, nodeptr n)
847{
848 int i;
849
850 for (i = 0; i < bt_subtrees(bt, n); i++) {
851 node_addr na;
852 nodeptr n2;
853
854 na = bt_child(bt, n, i);
855 if (!bt_unref_node(bt, na)) {
856 n2 = bt_write_lock_child(bt, n, i);
857 bt_free_node(bt, n2);
858 }
859 }
860
861 if (bt->freeelt) {
862 for (i = 0; i < bt_subtrees(bt, n)-1; i++)
863 bt->freeelt(bt->userstate, bt_element(bt, n, i));
864 }
865
866 bt_destroy_node(bt, n);
867}
868
869void bt_free(btree *bt)
870{
871 nodeptr n;
872
873 if (!bt_unref_node(bt, bt->root)) {
874 n = bt_write_lock_root(bt);
875 bt_free_node(bt, n);
876 }
877
878 sfree(bt);
879}
880
881btree *bt_clone(btree *bt)
882{
883 btree *bt2;
884
885 bt2 = bt_new(bt->cmp, bt->copy, bt->freeelt, bt->propsize, bt->propalign,
886 bt->propmake, bt->propmerge, bt->userstate, bt->mindegree);
887 bt2->depth = bt->depth;
888 bt2->root = bt_ref_node(bt, bt->root);
889 return bt2;
890}
891
892/*
893 * Nice simple function to count the size of a tree.
894 */
895int bt_count(btree *bt)
896{
897 int count;
898 nodeptr n;
899
900 n = bt_read_lock_root(bt);
901 if (n) {
902 count = bt_node_count(bt, n);
903 bt_read_unlock(bt, n);
904 return count;
905 } else {
906 return 0;
907 }
908}
909
910/* ----------------------------------------------------------------------
911 * Actual B-tree algorithms.
912 */
913
914/*
915 * Find an element by numeric index. bt_index_w is the same, but
916 * works with write locks instead of read locks, so it guarantees
917 * to return an element with only one reference to it. (You'd use
918 * this if you were using tree cloning, and wanted to modify the
919 * element once you'd found it.)
920 */
921bt_element_t bt_index(btree *bt, int index)
922{
923 nodeptr n, n2;
924 int child, ends;
925
926 n = bt_read_lock_root(bt);
927
928 if (index < 0 || index >= bt_node_count(bt, n)) {
929 bt_read_unlock(bt, n);
930 return NULL;
931 }
932
933 while (1) {
934 child = bt_lookup_pos(bt, n, &index, &ends);
935 if (ends & ENDS_RIGHT) {
936 bt_element_t ret = bt_element(bt, n, child);
937 bt_read_unlock(bt, n);
938 return ret;
939 }
940 n2 = bt_read_lock_child(bt, n, child);
941 bt_read_unlock(bt, n);
942 n = n2;
943 assert(n != NULL);
944 }
945}
946
947bt_element_t bt_index_w(btree *bt, int index)
948{
949 nodeptr n, n2;
950 int nnodes, child, ends;
951 nodeptr *nodes;
952 bt_element_t ret;
953
954 nodes = inewn(nodeptr, bt->depth+1);
955 nnodes = 0;
956
957 n = bt_write_lock_root(bt);
958
959 if (index < 0 || index >= bt_node_count(bt, n)) {
960 bt_write_unlock(bt, n);
961 return NULL;
962 }
963
964 while (1) {
965 nodes[nnodes++] = n;
966 child = bt_lookup_pos(bt, n, &index, &ends);
967 if (ends & ENDS_RIGHT) {
968 ret = bt_element(bt, n, child);
969 break;
970 }
971 n2 = bt_write_lock_child(bt, n, child);
972 n = n2;
973 assert(n != NULL);
974 }
975
976 while (nnodes-- > 0)
977 bt_write_unlock(bt, nodes[nnodes]);
978
979 return ret;
980}
981
982/*
983 * Search for an element by sorted order.
984 */
985bt_element_t bt_findrelpos(btree *bt, bt_element_t element, cmpfn_t cmp,
986 int relation, int *index)
987{
988 nodeptr n, n2;
989 int child, is_elt;
990 bt_element_t gotit;
991 int pos = 0;
e7f01466 992
993 if (!cmp) cmp = bt->cmp;
994
995 /*
996 * Special case: relation LT/GT and element NULL means get an
997 * extreme element of the tree. We do this by fudging the
998 * compare function so that our NULL element will be considered
999 * infinitely large or infinitely small.
1000 */
1001 if (element == NULL) {
1002 assert(relation == BT_REL_LT || relation == BT_REL_GT);
1003 if (relation == BT_REL_LT)
1004 cmp = bt_cmp_greater; /* always returns a > b */
1005 else
1006 cmp = bt_cmp_less; /* always returns a < b */
1007 }
1008
1009 gotit = NULL;
1010 n = bt_read_lock_root(bt);
1011 if (!n)
1012 return NULL;
e7f01466 1013 while (n) {
1014 child = bt_lookup_cmp(bt, n, element, cmp, &is_elt);
1015 if (is_elt) {
1016 pos += bt_child_startpos(bt, n, child+1) - 1;
1017 gotit = bt_element(bt, n, child);
1018 bt_read_unlock(bt, n);
1019 break;
1020 } else {
1021 pos += bt_child_startpos(bt, n, child);
1022 n2 = bt_read_lock_child(bt, n, child);
1023 bt_read_unlock(bt, n);
1024 n = n2;
1025 }
1026 }
1027
1028 /*
1029 * Now all nodes are unlocked, and we are _either_ (a) holding
1030 * an element in `gotit' whose index we have in `pos', _or_ (b)
1031 * holding nothing in `gotit' but we know the index of the
1032 * next-higher element.
1033 */
1034 if (gotit) {
1035 /*
1036 * We have the real element. For EQ, LE and GE relations we
1037 * can now just return it; otherwise we must return the
1038 * next element down or up.
1039 */
1040 if (relation == BT_REL_LT)
1041 gotit = bt_index(bt, --pos);
1042 else if (relation == BT_REL_GT)
1043 gotit = bt_index(bt, ++pos);
1044 } else {
1045 /*
1046 * We don't have the real element. For EQ relation we now
1047 * just give up; for everything else we return the next
1048 * element down or up.
1049 */
1050 if (relation == BT_REL_LT || relation == BT_REL_LE)
1051 gotit = bt_index(bt, --pos);
1052 else if (relation == BT_REL_GT || relation == BT_REL_GE)
1053 gotit = bt_index(bt, pos);
1054 }
1055
1056 if (gotit && index) *index = pos;
1057 return gotit;
1058}
1059bt_element_t bt_findrel(btree *bt, bt_element_t element, cmpfn_t cmp,
1060 int relation)
1061{
1062 return bt_findrelpos(bt, element, cmp, relation, NULL);
1063}
1064bt_element_t bt_findpos(btree *bt, bt_element_t element, cmpfn_t cmp,
1065 int *index)
1066{
1067 return bt_findrelpos(bt, element, cmp, BT_REL_EQ, index);
1068}
1069bt_element_t bt_find(btree *bt, bt_element_t element, cmpfn_t cmp)
1070{
1071 return bt_findrelpos(bt, element, cmp, BT_REL_EQ, NULL);
1072}
1073
1074/*
1075 * Find an element by property-based search. Returns the element
1076 * (if one is selected - the search can also terminate by
1077 * descending to a nonexistent subtree of a leaf node, equivalent
1078 * to selecting the _gap_ between two elements); also returns the
1079 * index of either the element or the gap in `*index' if `index' is
1080 * non-NULL.
1081 */
1082bt_element_t bt_propfind(btree *bt, searchfn_t search, void *sstate,
1083 int *index)
1084{
1085 nodeptr n, n2;
1086 int i, j, count, is_elt;
1087 void **props;
1088 int *counts;
1089 bt_element_t *elts;
1090 bt_element_t *e = NULL;
1091
1092 props = inewn(void *, bt->maxdegree);
1093 counts = inewn(int, bt->maxdegree);
1094 elts = inewn(bt_element_t, bt->maxdegree);
1095
1096 n = bt_read_lock_root(bt);
1097
1098 count = 0;
1099
1100 while (n) {
1101 int ntrees = bt_subtrees(bt, n);
1102
1103 /*
1104 * Prepare the arguments to the search function.
1105 */
1106 for (i = 0; i < ntrees; i++) {
1107 props[i] = bt_child_prop(bt, n, i);
1108 counts[i] = bt_child_count(bt, n, i);
1109 if (i < ntrees-1)
1110 elts[i] = bt_element(bt, n, i);
1111 }
1112
1113 /*
1114 * Call the search function.
1115 */
1116 i = search(bt->userstate, sstate, ntrees,
1117 props, counts, elts, &is_elt);
1118
1119 if (!is_elt) {
1120 /*
1121 * Descend to subtree i. Update `count' to consider
1122 * everything (both subtrees and elements) before that
1123 * subtree.
1124 */
1125 for (j = 0; j < i; j++)
1126 count += 1 + bt_child_count(bt, n, j);
1127 n2 = bt_read_lock_child(bt, n, i);
1128 bt_read_unlock(bt, n);
1129 n = n2;
1130 } else {
1131 /*
1132 * Return element i. Update `count' to consider
1133 * everything (both subtrees and elements) before that
1134 * element.
1135 */
1136 for (j = 0; j <= i; j++)
1137 count += 1 + bt_child_count(bt, n, j);
1138 count--; /* don't count element i itself */
1139 e = bt_element(bt, n, i);
1140 bt_read_unlock(bt, n);
1141 break;
1142 }
1143 }
1144
1145 ifree(props);
1146 ifree(counts);
1147 ifree(elts);
1148
1149 if (index) *index = count;
1150 return e;
1151}
1152
1153/*
1154 * Replace the element at a numeric index by a new element. Returns
1155 * the old element.
1156 *
1157 * Can also be used when the new element is the _same_ as the old
1158 * element, but has changed in some way that will affect user
1159 * properties.
1160 */
1161bt_element_t bt_replace(btree *bt, bt_element_t element, int index)
1162{
1163 nodeptr n;
1164 nodeptr *nodes;
1165 bt_element_t ret;
1166 int nnodes, child, ends;
1167
1168 nodes = inewn(nodeptr, bt->depth+1);
1169 nnodes = 0;
1170
1171 n = bt_write_lock_root(bt);
1172
1173 if (index < 0 || index >= bt_node_count(bt, n)) {
1174 bt_write_unlock(bt, n);
1175 return NULL;
1176 }
1177
1178 while (1) {
1179 nodes[nnodes++] = n;
1180 child = bt_lookup_pos(bt, n, &index, &ends);
1181 if (ends & ENDS_RIGHT) {
1182 ret = bt_element(bt, n, child);
1183 bt_set_element(bt, n, child, element);
1184 break;
1185 }
1186 n = bt_write_lock_child(bt, n, child);
1187 assert(n != NULL);
1188 }
1189
1190 while (nnodes-- > 0)
1191 bt_write_unlock(bt, nodes[nnodes]);
1192
1193 return ret;
1194}
1195
1196/*
1197 * Add at a specific position. As we search down the tree we must
1198 * write-lock every node we meet, since otherwise we might fail to
1199 * clone nodes that will end up pointing to different things.
1200 */
1201void bt_addpos(btree *bt, bt_element_t element, int pos)
1202{
1203 nodeptr n;
1204 node_addr left, right, single;
1205 nodeptr *nodes;
1206 int *childposns;
1207 int nnodes, child;
1208
1209 /*
1210 * Since in a reference-counted tree we can't have parent
1211 * links, we will have to use O(depth) space to store the list
1212 * of nodeptrs we have gone through, so we can un-write-lock
1213 * them when we've finished. We also store the subtree index we
1214 * descended to at each stage.
1215 */
1216 nodes = inewn(nodeptr, bt->depth+1);
1217 childposns = inewn(int, bt->depth+1);
1218 nnodes = 0;
1219
1220 n = bt_write_lock_root(bt);
1221
1222 assert(pos >= 0 && pos <= (n ? bt_node_count(bt, n) : 0));
1223
1224 /*
1225 * Scan down the tree, write-locking nodes, until we find the
1226 * empty subtree where we want to insert the item.
1227 */
1228 while (n) {
1229 nodes[nnodes] = n;
1230 child = bt_lookup_pos(bt, n, &pos, NULL);
1231 childposns[nnodes] = child;
1232 nnodes++;
1233 n = bt_write_lock_child(bt, n, child);
1234 }
1235
1236 left = right = NODE_ADDR_NULL;
1237
1238 /*
1239 * Now nodes[nnodes-1] wants to have subtree index
1240 * childposns[nnodes-1] replaced by the node/element/node triple
1241 * (left,element,right). Propagate this up the tree until we
1242 * can stop.
1243 */
1244 while (nnodes-- > 0) {
1245 n = nodes[nnodes];
1246 if (bt_subtrees(bt, n) == bt_max_subtrees(bt)) {
1247 nodeptr lptr, rptr;
1248 /* Split the node and carry on up. */
1249 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1250 n, NULL, element, left, right,
1251 bt_min_subtrees(bt), &lptr, &rptr, &element);
1252 left = bt_write_unlock(bt, lptr);
1253 right = bt_write_unlock(bt, rptr);
1254 } else {
1255 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1256 n, NULL, element, left, right,
1257 NODE_JOIN, &n, NULL, NULL);
1258 single = bt_write_unlock(bt, n);
1259 break;
1260 }
1261 }
1262
1263 /*
1264 * If nnodes < 0, we have just split the root and we need to
1265 * build a new root node.
1266 */
1267 if (nnodes < 0) {
1268 bt_new_root(bt, left, right, element);
1269 } else {
1270 /*
1271 * Now nodes[nnodes-1] just wants to have child pointer
1272 * child[nnodes-1] replaced by `single', in case the
1273 * subtree was moved. Propagate this back up to the root,
1274 * unlocking all nodes.
1275 */
1276 while (nnodes-- > 0) {
1277 bt_set_child(bt, nodes[nnodes], childposns[nnodes], single);
1278 single = bt_write_unlock(bt, nodes[nnodes]);
1279 }
1280 }
1281
1282 ifree(nodes);
1283 ifree(childposns);
1284}
1285
1286/*
1287 * Add an element in sorted order. This is a wrapper on bt_addpos()
1288 * which finds the numeric index to add the item at and then calls
1289 * addpos. This isn't an optimal use of time, but it saves space by
1290 * avoiding starting to clone multiply-linked nodes until it's
1291 * known that the item _can_ be added to the tree (and isn't
1292 * duplicated in it already).
1293 */
1294bt_element_t bt_add(btree *bt, bt_element_t element)
1295{
1296 nodeptr n, n2;
1297 int child, is_elt;
1298 int pos = 0;
1299
1300 n = bt_read_lock_root(bt);
1301 while (n) {
1302 child = bt_lookup_cmp(bt, n, element, bt->cmp, &is_elt);
1303 if (is_elt) {
1304 bt_read_unlock(bt, n);
1305 return bt_element(bt, n, child); /* element exists already */
1306 } else {
1307 pos += bt_child_startpos(bt, n, child);
1308 n2 = bt_read_lock_child(bt, n, child);
1309 bt_read_unlock(bt, n);
1310 n = n2;
1311 }
1312 }
1313 bt_addpos(bt, element, pos);
1314 return element;
1315}
1316
1317/*
1318 * Delete an element given its numeric position. Returns the
1319 * element deleted.
1320 */
1321bt_element_t bt_delpos(btree *bt, int pos)
1322{
1323 nodeptr n, c, c2, saved_n;
1324 nodeptr *nodes;
1325 int nnodes, child, nroot, pos2, ends, st, splitpoint, saved_pos;
1326 bt_element_t e, ret;
1327
1328 /*
1329 * Just like in bt_add, we store the set of nodeptrs we
1330 * write-locked on the way down, so we can unlock them on the
1331 * way back up.
1332 */
1333 nodes = inewn(nodeptr, bt->depth+1);
1334 nnodes = 0;
1335
1336 n = bt_write_lock_root(bt);
1337 nroot = TRUE;
1338 saved_n = NULL;
1339
1340 if (!n || pos < 0 || pos >= bt_node_count(bt, n)) {
1341 if (n)
1342 bt_write_unlock(bt, n);
1343 return NULL;
1344 }
1345
1346 while (1) {
1347 nodes[nnodes++] = n;
1348
1349 /*
1350 * Find out which subtree to descend to.
1351 */
1352 pos2 = pos;
1353 child = bt_lookup_pos(bt, n, &pos, &ends);
1354 c = bt_write_lock_child(bt, n, child);
1355 if (c && bt_subtrees(bt, c) == bt_min_subtrees(bt)) {
1356 /*
1357 * We're trying to descend to a subtree that's of
1358 * minimum size. Do something!
1359 */
1360 if (child > 0) {
1361 /*
1362 * Either move a subtree from the left sibling, or
1363 * merge with it. (Traditionally we would only
1364 * merge if we can't move a subtree from _either_
1365 * sibling, but this way avoids too many extra
1366 * write locks.)
1367 */
1368 c2 = c;
1369 c = bt_write_lock_child(bt, n, child-1);
1370 e = bt_element(bt, n, child-1);
1371 st = bt_subtrees(bt, c);
1372 if (st > bt_min_subtrees(bt))
1373 splitpoint = st - 2;
1374 else
1375 splitpoint = NODE_JOIN;
1376 child--;
1377 } else {
1378 /*
1379 * Likewise on the right-hand side.
1380 */
1381 c2 = bt_write_lock_child(bt, n, child+1);
1382 e = bt_element(bt, n, child);
1383 st = bt_subtrees(bt, c2);
1384 if (st > bt_min_subtrees(bt))
1385 splitpoint = bt_min_subtrees(bt);
1386 else
1387 splitpoint = NODE_JOIN;
1388 }
1389
1390 if (splitpoint == NODE_JOIN) {
1391 /*
1392 * So if we're merging nodes, go to it...
1393 */
1394 bt_xform(bt, NODE_AS_IS, 0,
1395 c, c2, e, NODE_ADDR_NULL, NODE_ADDR_NULL,
1396 NODE_JOIN, &c, NULL, NULL);
1397 bt_xform(bt, NODE_DEL_ELT, child,
1398 n, NULL, NULL, bt_node_addr(bt, c), NODE_ADDR_NULL,
1399 NODE_JOIN, &n, NULL, NULL);
1400 if (nroot && bt_subtrees(bt, n) == 1) {
1401 /*
1402 * Whoops, we just merged the last two children
1403 * of the root. Better relocate the root.
1404 */
1405 bt_shift_root(bt, n, bt_node_addr(bt, c));
1406 nnodes--; /* don't leave it in nodes[]! */
1407 n = NULL;
1408 bt_write_relock(bt, c, TRUE);
1409 } else
1410 bt_write_unlock(bt, c);
1411 } else {
1412 /*
1413 * Or if we're redistributing subtrees, go to that.
1414 */
1415 bt_xform(bt, NODE_AS_IS, 0,
1416 c, c2, e, NODE_ADDR_NULL, NODE_ADDR_NULL,
1417 splitpoint, &c, &c2, &e);
1418 bt_set_element(bt, n, child, e);
1419 bt_write_unlock(bt, c);
1420 bt_write_unlock(bt, c2);
1421 }
1422
1423 if (n) {
1424 /* Recompute the counts in n so we can do lookups again. */
1425 bt_write_relock(bt, n, TRUE);
1426
1427 /* Having done the transform, redo the position lookup. */
1428 pos = pos2;
1429 child = bt_lookup_pos(bt, n, &pos, &ends);
1430 c = bt_write_lock_child(bt, n, child);
1431 } else {
1432 pos = pos2;
1433 }
1434 }
1435
1436 /*
1437 * Now see if this node contains the element we're
1438 * looking for.
1439 */
1440 if (n && (ends & ENDS_RIGHT)) {
1441 /*
1442 * It does. Element number `child' is the element we
1443 * want to delete. See if this is a leaf node...
1444 */
1445 if (!bt_is_leaf(bt, n)) {
1446 /*
1447 * It's not a leaf node. So we save the nodeptr and
1448 * element index for later reference, and decrement
1449 * `pos' so that we're searching for the element to its
1450 * left, which _will_ be in a leaf node.
1451 */
1452 saved_n = n;
1453 saved_pos = child;
1454 pos--;
1455 } else {
1456 /*
1457 * We've reached a leaf node. Check to see if an
1458 * internal-node position was stored in saved_n and
1459 * saved_pos, and move this element there if so.
1460 */
1461 if (saved_n) {
1462 ret = bt_element(bt, saved_n, saved_pos);
1463 bt_set_element(bt, saved_n, saved_pos,
1464 bt_element(bt, n, child));
1465 } else {
1466 ret = bt_element(bt, n, child);
1467 }
1468 /* Then delete it from the leaf node. */
1469 bt_xform(bt, NODE_DEL_ELT, child,
1470 n, NULL, NULL, NODE_ADDR_NULL, NODE_ADDR_NULL,
1471 NODE_JOIN, &n, NULL, NULL);
1472 /*
1473 * Final special case: if this is the root node and
1474 * we've just deleted its last element, we should
1475 * destroy it and leave a completely empty tree.
1476 */
1477 if (nroot && bt_subtrees(bt, n) == 1) {
1478 bt_shift_root(bt, n, NODE_ADDR_NULL);
1479 nnodes--; /* and take it out of nodes[] */
1480 }
1481 /* Now we're done */
1482 break;
1483 }
1484 }
1485
1486 /* Descend to the child and go round again. */
1487 n = c;
1488 nroot = FALSE;
1489 }
1490
1491 /*
1492 * All done. Zip back up the tree un-write-locking nodes.
1493 */
1494 while (nnodes-- > 0)
1495 bt_write_unlock(bt, nodes[nnodes]);
1496
1497 ifree(nodes);
1498
1499 return ret;
1500}
1501
1502/*
1503 * Delete an element in sorted order.
1504 */
1505bt_element_t bt_del(btree *bt, bt_element_t element)
1506{
1507 int index;
1508 if (!bt_findrelpos(bt, element, NULL, BT_REL_EQ, &index))
1509 return NULL; /* wasn't there */
1510 return bt_delpos(bt, index);
1511}
1512
1513/*
1514 * Join two trees together, given their respective depths and a
1515 * middle element. Puts the resulting tree in the root of `bt'.
1516 *
1517 * This internal routine assumes that the trees have the same
1518 * degree.
1519 *
1520 * The input nodeptrs are assumed to be write-locked, but none of
1521 * their children are yet write-locked.
1522 */
1523static void bt_join_internal(btree *bt, nodeptr lp, nodeptr rp,
1524 bt_element_t sep, int ld, int rd)
1525{
1526 nodeptr *nodes;
1527 int *childposns;
1528 int nnodes, nodessize;
1529 int lsub, rsub;
1530
1531 /*
1532 * We will need to store parent nodes up to the difference
1533 * between ld and rd.
1534 */
1535 nodessize = (ld < rd ? rd-ld : ld-rd);
1536 if (nodessize) { /* we may not need _any_! */
1537 nodes = inewn(nodeptr, nodessize);
1538 childposns = inewn(int, nodessize);
1539 }
1540 nnodes = 0;
1541
1542 if (ld > rd) {
1543 bt->root = bt_node_addr(bt, lp);
1544 bt->depth = ld;
1545 /* If the left tree is taller, search down its right-hand edge. */
1546 while (ld > rd) {
1547 int child = bt_subtrees(bt, lp) - 1;
1548 nodeptr n = bt_write_lock_child(bt, lp, child);
1549 nodes[nnodes] = lp;
1550 childposns[nnodes] = child;
1551 nnodes++;
1552 lp = n;
1553 ld--;
1554 }
1555 } else {
1556 bt->root = bt_node_addr(bt, rp);
1557 bt->depth = rd;
1558 /* If the right tree is taller, search down its left-hand edge. */
1559 while (rd > ld) {
1560 nodeptr n = bt_write_lock_child(bt, rp, 0);
1561 nodes[nnodes] = rp;
1562 childposns[nnodes] = 0;
1563 nnodes++;
1564 rp = n;
1565 rd--;
1566 }
1567 }
1568
1569 /*
1570 * So we now want to combine nodes lp and rp into either one or
1571 * two plausibly-sized nodes, whichever is feasible. We have a
1572 * joining element `sep'.
1573 */
1574 lsub = (lp ? bt_subtrees(bt, lp) : 0);
1575 rsub = (rp ? bt_subtrees(bt, rp) : 0);
1576 if (lp && rp && lsub + rsub <= bt_max_subtrees(bt)) {
1577 node_addr la;
1578 /* Join the nodes into one. */
1579 bt_xform(bt, NODE_AS_IS, 0, lp, rp, sep,
1580 NODE_ADDR_NULL, NODE_ADDR_NULL,
1581 NODE_JOIN, &lp, NULL, NULL);
1582 /* Unlock the node. */
1583 la = bt_write_unlock(bt, lp);
1584 /* Update the child pointer in the next node up. */
1585 if (nnodes > 0)
1586 bt_set_child(bt, nodes[nnodes-1], childposns[nnodes-1], la);
1587 else
1588 bt->root = la;
1589 } else {
1590 node_addr la, ra;
1591 if (!lp || !rp) {
1592 la = NODE_ADDR_NULL;
1593 ra = NODE_ADDR_NULL;
1594 } else {
1595 int lsize, rsize;
1596 /* Re-split the nodes into two plausibly sized ones. */
1597 lsize = lsub + rsub;
1598 rsize = lsize / 2;
1599 lsize -= rsize;
1600 bt_xform(bt, NODE_AS_IS, 0, lp, rp, sep,
1601 NODE_ADDR_NULL, NODE_ADDR_NULL,
1602 lsize-1, &lp, &rp, &sep);
1603 /* Unlock the nodes. */
1604 la = bt_write_unlock(bt, lp);
1605 ra = bt_write_unlock(bt, rp);
1606 }
1607
1608 /*
1609 * Now we have to do the addition thing: progress up the
1610 * tree replacing a single subtree pointer with the
1611 * la/sep/ra assembly, until no more nodes have to split as
1612 * a result.
1613 */
1614 while (nnodes-- > 0) {
1615 nodeptr n = nodes[nnodes];
1616 if (bt_subtrees(bt, n) == bt_max_subtrees(bt)) {
1617 /* Split the node and carry on up. */
1618 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1619 n, NULL, sep, la, ra,
1620 bt_min_subtrees(bt), &lp, &rp, &sep);
1621 la = bt_write_unlock(bt, lp);
1622 ra = bt_write_unlock(bt, rp);
1623 } else {
1624 bt_xform(bt, NODE_ADD_ELT, childposns[nnodes],
1625 n, NULL, sep, la, ra,
1626 NODE_JOIN, &n, NULL, NULL);
1627 bt_write_unlock(bt, n);
1628 break;
1629 }
1630 }
1631
1632 /*
1633 * If nnodes < 0, we have just split the root and we need
1634 * to build a new root node.
1635 */
1636 if (nnodes < 0)
1637 bt_new_root(bt, la, ra, sep);
1638 }
1639
1640 /*
1641 * Now we just need to go back up and unlock any remaining
1642 * nodes. Also here we ensure the root points where it should.
1643 */
1644 while (nnodes-- > 0) {
1645 node_addr na;
1646 na = bt_write_unlock(bt, nodes[nnodes]);
1647 if (nnodes == 0)
1648 bt->root = na;
1649 }
1650
1651 if (nodessize) {
1652 ifree(nodes);
1653 ifree(childposns);
1654 }
1655}
1656
1657/*
1658 * External interfaces to the join functionality: join and joinr
1659 * (differing only in which B-tree structure they leave without any
1660 * elements, and which they return the combined tree in).
1661 */
1662btree *bt_join(btree *bt1, btree *bt2)
1663{
1664 nodeptr root1, root2;
1665 int size2;
1666
1667 size2 = bt_count(bt2);
1668 if (size2 > 0) {
1669 bt_element_t sep;
1670
1671 if (bt1->cmp) {
1672 /*
1673 * The trees are ordered, so verify the ordering
1674 * condition: ensure nothing in bt1 is greater than or
1675 * equal to the minimum element in bt2.
1676 */
1677 sep = bt_index(bt2, 0);
1678 sep = bt_findrelpos(bt1, sep, NULL, BT_REL_GE, NULL);
1679 if (sep)
1680 return NULL;
1681 }
1682
1683 sep = bt_delpos(bt2, 0);
1684 root1 = bt_write_lock_root(bt1);
1685 root2 = bt_write_lock_root(bt2);
1686 bt_join_internal(bt1, root1, root2, sep, bt1->depth, bt2->depth);
1687 bt2->root = NODE_ADDR_NULL;
1688 bt2->depth = 0;
1689 }
1690 return bt1;
1691}
1692
1693btree *bt_joinr(btree *bt1, btree *bt2)
1694{
1695 nodeptr root1, root2;
1696 int size1;
1697
1698 size1 = bt_count(bt1);
1699 if (size1 > 0) {
1700 bt_element_t sep;
1701
1702 if (bt2->cmp) {
1703 /*
1704 * The trees are ordered, so verify the ordering
1705 * condition: ensure nothing in bt2 is less than or
1706 * equal to the maximum element in bt1.
1707 */
1708 sep = bt_index(bt1, size1-1);
1709 sep = bt_findrelpos(bt2, sep, NULL, BT_REL_LE, NULL);
1710 if (sep)
1711 return NULL;
1712 }
1713
1714 sep = bt_delpos(bt1, size1-1);
1715 root1 = bt_write_lock_root(bt1);
1716 root2 = bt_write_lock_root(bt2);
1717 bt_join_internal(bt2, root1, root2, sep, bt1->depth, bt2->depth);
1718 bt1->root = NODE_ADDR_NULL;
1719 bt1->depth = 0;
1720 }
1721 return bt2;
1722}
1723
1724/*
1725 * Perform the healing process after a tree has been split. `rhs'
1726 * is set if the cut edge is the one on the right.
1727 */
1728static void bt_split_heal(btree *bt, int rhs)
1729{
1730 nodeptr n;
1731 nodeptr *nodes;
1732 int nnodes;
1733
1734 nodes = inewn(nodeptr, bt->depth);
1735 nnodes = 0;
1736
1737 n = bt_write_lock_root(bt);
1738
1739 /*
1740 * First dispense with completely trivial cases: a root node
1741 * containing only one subtree can be thrown away instantly.
1742 */
1743 while (n && bt_subtrees(bt, n) == 1) {
1744 nodeptr n2 = bt_write_lock_child(bt, n, 0);
1745 bt_shift_root(bt, n, bt_node_addr(bt, n2));
1746 n = n2;
1747 }
1748
1749 /*
1750 * Now we have a plausible root node. Start going down the cut
1751 * edge looking for undersized or minimum nodes, and arranging
1752 * for them to be above minimum size.
1753 */
1754 while (n) {
1755 int edge, next, elt, size_e, size_n, size_total;
1756 nodeptr ne, nn, nl, nr;
1757 bt_element_t el;
1758
1759 nodes[nnodes++] = n;
1760
1761 if (rhs) {
1762 edge = bt_subtrees(bt, n) - 1;
1763 next = edge - 1;
1764 elt = next;
1765 } else {
1766 edge = 0;
1767 next = 1;
1768 elt = edge;
1769 }
1770
1771 ne = bt_write_lock_child(bt, n, edge);
1772 if (!ne)
1773 break;
1774
1775 size_e = bt_subtrees(bt, ne);
1776
1777 if (size_e <= bt_min_subtrees(bt)) {
1778 nn = bt_write_lock_child(bt, n, next);
1779 el = bt_element(bt, n, elt);
1780 size_n = bt_subtrees(bt, nn);
1781 if (edge < next)
1782 nl = ne, nr = nn;
1783 else
1784 nl = nn, nr = ne;
1785 size_total = size_e + size_n;
1786 if (size_e + size_n <= bt_max_subtrees(bt)) {
1787 /*
1788 * Merge the edge node and its sibling together.
1789 */
1790 bt_xform(bt, NODE_AS_IS, 0, nl, nr, el,
1791 NODE_ADDR_NULL, NODE_ADDR_NULL,
1792 NODE_JOIN, &ne, NULL, NULL);
1793 bt_xform(bt, NODE_DEL_ELT, elt, n, NULL, NULL,
1794 bt_node_addr(bt, ne), NODE_ADDR_NULL,
1795 NODE_JOIN, &n, NULL, NULL);
1796 /*
1797 * It's possible we've just trashed the root of the
1798 * tree, again.
1799 */
1800 if (bt_subtrees(bt, n) == 1) {
1801 bt_shift_root(bt, n, bt_node_addr(bt, ne));
1802 nnodes--; /* and take it out of nodes[] */
1803 }
1804 } else {
1805 /*
1806 * Redistribute subtrees between the edge node and
1807 * its sibling.
1808 */
1809 int split;
1810 size_e = (size_total + 1) / 2;
1811 assert(size_e > bt_min_subtrees(bt));
1812 if (next < edge)
1813 split = size_total - size_e - 1;
1814 else
1815 split = size_e - 1;
1816 bt_xform(bt, NODE_AS_IS, 0, nl, nr, el,
1817 NODE_ADDR_NULL, NODE_ADDR_NULL,
1818 split, &nl, &nr, &el);
1819 bt_write_unlock(bt, nn);
1820 bt_set_element(bt, n, elt, el);
1821 }
1822 }
1823
1824 n = ne;
1825 }
1826
1827 /*
1828 * Now we just need to go back up and unlock any remaining
1829 * nodes.
1830 */
1831 while (nnodes-- > 0)
1832 bt_write_unlock(bt, nodes[nnodes]);
1833
1834 ifree(nodes);
1835}
1836
1837/*
1838 * Split a tree by numeric position. The new tree returned is the
1839 * one on the right; the original tree contains the stuff on the
1840 * left.
1841 */
1842static btree *bt_split_internal(btree *bt1, int index)
1843{
1844 btree *bt2;
1845 nodeptr *lnodes, *rnodes;
1846 nodeptr n1, n2, n;
1847 int nnodes, child;
1848
1849 bt2 = bt_new(bt1->cmp, bt1->copy, bt1->freeelt, bt1->propsize,
1850 bt1->propalign, bt1->propmake, bt1->propmerge,
1851 bt1->userstate, bt1->mindegree);
1852 bt2->depth = bt1->depth;
1853
1854 lnodes = inewn(nodeptr, bt1->depth);
1855 rnodes = inewn(nodeptr, bt2->depth);
1856 nnodes = 0;
1857
1858 n1 = bt_write_lock_root(bt1);
1859 while (n1) {
1860 child = bt_lookup_pos(bt1, n1, &index, NULL);
1861 n = bt_write_lock_child(bt1, n1, child);
1862 bt_xform(bt1, NODE_ADD_ELT, child, n1, NULL, NULL,
1863 bt_node_addr(bt1, n), NODE_ADDR_NULL,
1864 child, &n1, &n2, NULL);
1865 lnodes[nnodes] = n1;
1866 rnodes[nnodes] = n2;
1867 if (nnodes > 0)
1868 bt_set_child(bt2, rnodes[nnodes-1], 0, bt_node_addr(bt2, n2));
1869 else
1870 bt2->root = bt_node_addr(bt2, n2);
1871 nnodes++;
1872 n1 = n;
1873 }
1874
1875 /*
1876 * Now we go back up and unlock all the nodes. At this point we
1877 * don't mess with user properties, because there's the danger
1878 * of a node containing no subtrees _or_ elements and hence us
1879 * having to invent a notation for an empty property. We're
1880 * going to make a second healing pass in a moment anyway,
1881 * which will sort all that out for us.
1882 */
1883 while (nnodes-- > 0) {
1884 bt_write_unlock_internal(bt1, lnodes[nnodes], FALSE);
1885 bt_write_unlock_internal(bt2, rnodes[nnodes], FALSE);
1886 }
1887
1888 /*
1889 * Then we make a healing pass down each side of the tree.
1890 */
1891 bt_split_heal(bt1, TRUE);
1892 bt_split_heal(bt2, FALSE);
1893
1894 ifree(lnodes);
1895 ifree(rnodes);
1896
1897 return bt2;
1898}
1899
1900/*
1901 * Split a tree at a numeric index.
1902 */
1903btree *bt_splitpos(btree *bt, int index, int before)
1904{
1905 btree *ret;
1906 node_addr na;
1907 int count, nd;
1908 nodeptr n;
1909
1910 n = bt_read_lock_root(bt);
1911 count = (n ? bt_node_count(bt, n) : 0);
1912 bt_read_unlock(bt, n);
1913
1914 if (index < 0 || index > count)
1915 return NULL;
1916
1917 ret = bt_split_internal(bt, index);
1918 if (before) {
1919 na = bt->root;
1920 bt->root = ret->root;
1921 ret->root = na;
1922
1923 nd = bt->depth;
1924 bt->depth = ret->depth;
1925 ret->depth = nd;
1926 }
1927 return ret;
1928}
1929
1930/*
1931 * Split a tree at a position dictated by the sorting order.
1932 */
1933btree *bt_split(btree *bt, bt_element_t element, cmpfn_t cmp, int rel)
1934{
1935 int before, index;
1936
1937 assert(rel != BT_REL_EQ); /* has to be an inequality */
1938
1939 if (rel == BT_REL_GT || rel == BT_REL_GE) {
1940 before = TRUE;
1941 rel = (rel == BT_REL_GT ? BT_REL_LE : BT_REL_LT);
1942 } else {
1943 before = FALSE;
1944 }
1945 if (!bt_findrelpos(bt, element, cmp, rel, &index))
1946 index = -1;
1947 return bt_splitpos(bt, index+1, before);
1948}
1949
1950#ifdef TEST
1951
1952#define TEST_DEGREE 4
1953#define BT_COPY bt_clone
1954#define MAXTREESIZE 10000
1955#define MAXLOCKS 100
1956
1957int errors;
1958
1959/*
1960 * Error reporting function.
1961 */
1962void error(char *fmt, ...) {
1963 va_list ap;
1964 fprintf(stderr, "ERROR: ");
1965 va_start(ap, fmt);
1966 vfprintf(stderr, fmt, ap);
1967 va_end(ap);
1968 fprintf(stderr, "\n");
1969 errors++;
1970}
1971
1972/*
1973 * See if a tree has a 2-element root node.
1974 */
1975static int bt_tworoot(btree *bt)
1976{
1977 nodeptr n;
1978 int i;
1979 n = bt_read_lock_root(bt);
1980 i = bt_subtrees(bt, n);
1981 bt_read_unlock(bt, n);
1982 return (i == 2 ? TRUE : FALSE);
1983}
1984
1985/*
1986 * Physically copy an entire B-tree. (NB this appears as a test
1987 * routine rather than a production one, since reference counting
1988 * and bt_clone() provide a better way to do this for real code. If
1989 * anyone really needs a genuine physical copy for anything other
1990 * than testing reasons, I suppose they could always lift this into
1991 * the admin section above.)
1992 */
1993
1994static nodeptr bt_copy_node(btree *bt, nodeptr n)
1995{
1996 int i, children;
1997 nodeptr ret;
1998
1999 children = bt_subtrees(bt, n);
2000 ret = bt_new_node(bt, children);
2001
2002 for (i = 0; i < children; i++) {
2003 nodeptr n2 = bt_read_lock_child(bt, n, i);
2004 nodeptr n3;
2005 if (n2) {
2006 n3 = bt_copy_node(bt, n2);
2007 bt_set_child(bt, ret, i, bt_write_unlock(bt, n3));
2008 } else {
2009 bt_set_child(bt, ret, i, NODE_ADDR_NULL);
2010 }
2011 bt_read_unlock(bt, n2);
2012
2013 if (i < children-1) {
2014 bt_element_t e = bt_element(bt, n, i);
2015 if (bt->copy)
2016 e = bt->copy(bt->userstate, e);
2017 bt_set_element(bt, ret, i, e);
2018 }
2019 }
2020
2021 return ret;
2022}
2023
2024btree *bt_copy(btree *bt)
2025{
2026 nodeptr n;
2027 btree *bt2;
2028
2029 bt2 = bt_new(bt->cmp, bt->copy, bt->freeelt, bt->propsize, bt->propalign,
2030 bt->propmake, bt->propmerge, bt->userstate, bt->mindegree);
2031 bt2->depth = bt->depth;
2032
2033 n = bt_read_lock_root(bt);
2034 if (n)
2035 bt2->root = bt_write_unlock(bt2, bt_copy_node(bt, n));
2036 bt_read_unlock(bt, n);
2037
2038 return bt2;
2039}
2040
2041/*
2042 * This function is intended to be called from gdb when debugging
2043 * things.
2044 */
2045void bt_dump_nodes(btree *bt, ...)
2046{
2047 int i, children;
2048 va_list ap;
2049 nodeptr n;
2050
2051 va_start(ap, bt);
2052 while (1) {
2053 n = va_arg(ap, nodeptr);
2054 if (!n)
2055 break;
2056 printf("%p [%d]:", n, n[bt->maxdegree*2+1].i);
2057 children = bt_subtrees(bt, n);
2058 for (i = 0; i < children; i++) {
2059 printf(" %p", bt_child(bt, n, i).p);
2060 if (i < children-1)
2061 printf(" %s", (char *)bt_element(bt, n, i));
2062 }
2063 printf("\n");
2064 }
2065 va_end(ap);
2066}
2067
2068/*
2069 * Verify a tree against an array. Checks that:
2070 *
2071 * - every node has a valid number of subtrees
2072 * - subtrees are either all present (internal node) or all absent
2073 * (leaf)
2074 * - elements are all present
2075 * - every leaf is at exactly the depth claimed by the tree
2076 * - the tree represents the correct list of elements in the
2077 * correct order. (This also tests the ordering constraint,
2078 * assuming the array is correctly constructed.)
2079 */
2080
2081void verifynode(btree *bt, nodeptr n, bt_element_t *array, int *arraypos,
2082 int depth)
2083{
2084 int subtrees, min, max, i, before, after, count;
2085
2086 /* Check the subtree count. The root can have as few as 2 subtrees. */
2087 subtrees = bt_subtrees(bt, n);
2088 max = bt_max_subtrees(bt);
2089 min = (depth == 1) ? 2 : bt_min_subtrees(bt);
2090 if (subtrees > max)
2091 error("node %p has too many subtrees (%d > %d)", n, subtrees, max);
2092 if (subtrees < min)
2093 error("node %p has too few subtrees (%d < %d)", n, subtrees, min);
2094
2095 /* Check that subtrees are present or absent as required. */
2096 for (i = 0; i < subtrees; i++) {
2097 node_addr child = bt_child(bt, n, i);
2098 if (depth == bt->depth && child.p != NULL)
2099 error("leaf node %p child %d is %p not NULL\n", n, i, child);
2100 if (depth != bt->depth && child.p == NULL)
2101 error("non-leaf node %p child %d is NULL\n", n, i);
2102 }
2103
2104 /* Check that elements are all present. */
2105 for (i = 0; i < subtrees-1; i++) {
2106 bt_element_t elt = bt_element(bt, n, i);
2107 if (elt == NULL)
2108 error("node %p element %d is NULL\n", n, i);
2109 }
2110
2111 before = *arraypos;
2112
2113 /* Now verify the subtrees, and simultaneously check the ordering. */
2114 for (i = 0; i < subtrees; i++) {
2115 if (depth < bt->depth) {
2116 nodeptr child = bt_read_lock_child(bt, n, i);
2117 verifynode(bt, child, array, arraypos, depth+1);
2118 bt_read_unlock(bt, child);
2119 }
2120 if (i < subtrees-1) {
2121 bt_element_t elt = bt_element(bt, n, i);
2122 if (array[*arraypos] != elt) {
2123 error("node %p element %d is \"%s\", but array[%d]=\"%s\"",
2124 n, i, elt, *arraypos, array[*arraypos]);
2125 }
2126 (*arraypos)++;
2127 }
2128 }
2129
2130 after = *arraypos;
2131
2132 /* Check the node count. */
2133 count = bt_node_count(bt, n);
2134 if (count != after - before)
2135 error("node %p count is %d, should be %d", n, count, after - before);
2136
2137 /*
2138 * Check the user properties.
2139 */
2140 {
2141 nodecomponent *prop;
2142 int i;
2143 int max = 0, total = 0;
2144
2145 prop = n + bt->maxdegree * 2 + 2;
2146
2147 for (i = before; i < after; i++) {
2148 int c = (unsigned char)*(char *)array[i];
2149
2150 if (max < c) max = c;
2151 total += c;
2152 }
2153
2154 if (prop[0].i != total)
2155 error("node %p total prop is %d, should be %d", n,
2156 prop[0].i, total);
2157 if (prop[1].i != max)
2158 error("node %p max prop is %d, should be %d", n,
2159 prop[1].i, max);
2160 }
2161}
2162
2163void verifytree(btree *bt, bt_element_t *array, int arraylen)
2164{
2165 nodeptr n;
2166 int i = 0;
2167 n = bt_read_lock_root(bt);
2168 if (n) {
2169 verifynode(bt, n, array, &i, 1);
2170 bt_read_unlock(bt, n);
2171 } else {
2172 if (bt->depth != 0) {
2173 error("tree has null root but depth is %d not zero", bt->depth);
2174 }
2175 }
2176 if (i != arraylen)
2177 error("tree contains %d elements, array contains %d",
2178 i, arraylen);
2179 testlock(-1, 0, NULL);
2180}
2181
2182int mycmp(void *state, void *av, void *bv) {
2183 char const *a = (char const *)av;
2184 char const *b = (char const *)bv;
2185 return strcmp(a, b);
2186}
2187
2188static void set_invalid_property(void *propv)
2189{
2190 int *prop = (int *)propv;
2191 prop[0] = prop[1] = -1;
2192}
2193
2194void mypropmake(void *state, void *av, void *destv)
2195{
2196 char const *a = (char const *)av;
2197 int *dest = (int *)destv;
2198 dest[0] = dest[1] = (unsigned char)*a;
2199}
2200
2201void mypropmerge(void *state, void *s1v, void *s2v, void *destv)
2202{
2203 int *s1 = (int *)s1v;
2204 int *s2 = (int *)s2v;
2205 int *dest = (int *)destv;
2206 if (!s1v && !s2v) {
2207 /* Special `destroy' case. */
2208 set_invalid_property(destv);
2209 return;
2210 }
2211 assert(s2[0] >= 0 && s2[1] >= 0);
2212 assert(s1 == NULL || (s1[0] >= 0 && s1[1] >= 0));
2213 dest[0] = s2[0] + (s1 ? s1[0] : 0);
2214 dest[1] = (s1 && s1[1] > s2[1] ? s1[1] : s2[1]);
2215}
2216
2217void array_addpos(bt_element_t *array, int *arraylen, bt_element_t e, int i)
2218{
2219 bt_element_t e2;
2220 int len = *arraylen;
2221
2222 assert(len < MAXTREESIZE);
2223
2224 while (i < len) {
2225 e2 = array[i];
2226 array[i] = e;
2227 e = e2;
2228 i++;
2229 }
2230 array[len] = e;
2231 *arraylen = len+1;
2232}
2233
2234void array_add(bt_element_t *array, int *arraylen, bt_element_t e)
2235{
2236 int i;
2237 int len = *arraylen;
2238
2239 for (i = 0; i < len; i++)
2240 if (mycmp(NULL, array[i], e) >= 0)
2241 break;
2242 assert(i == len || mycmp(NULL, array[i], e) != 0);
2243 array_addpos(array, arraylen, e, i);
2244}
2245
2246void array_delpos(bt_element_t *array, int *arraylen, int i)
2247{
2248 int len = *arraylen;
2249
2250 while (i < len-1) {
2251 array[i] = array[i+1];
2252 i++;
2253 }
2254 *arraylen = len-1;
2255}
2256
2257bt_element_t array_del(bt_element_t *array, int *arraylen, bt_element_t e)
2258{
2259 int i;
2260 int len = *arraylen;
2261 bt_element_t ret;
2262
2263 for (i = 0; i < len; i++)
2264 if (mycmp(NULL, array[i], e) >= 0)
2265 break;
2266 if (i < len && mycmp(NULL, array[i], e) == 0) {
2267 ret = array[i];
2268 array_delpos(array, arraylen, i);
2269 } else
2270 ret = NULL;
2271 return ret;
2272}
2273
2274/* A sample data set and test utility. Designed for pseudo-randomness,
2275 * and yet repeatability. */
2276
2277/*
2278 * This random number generator uses the `portable implementation'
2279 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
2280 * change it if not.
2281 */
2282int randomnumber(unsigned *seed) {
2283 *seed *= 1103515245;
2284 *seed += 12345;
2285 return ((*seed) / 65536) % 32768;
2286}
2287
2288#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
2289
2290char *strings[] = {
2291 "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
2292 "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
2293 "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
2294 "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
2295 "m", "s", "l", "4",
2296};
2297
2298#define NSTR lenof(strings)
2299
2300void findtest(btree *tree, bt_element_t *array, int arraylen)
2301{
2302 static const int rels[] = {
2303 BT_REL_EQ, BT_REL_GE, BT_REL_LE, BT_REL_LT, BT_REL_GT
2304 };
2305 static const char *const relnames[] = {
2306 "EQ", "GE", "LE", "LT", "GT"
2307 };
2308 int i, j, rel, index;
2309 char *p, *ret, *realret, *realret2;
2310 int lo, hi, mid, c;
2311
2312 for (i = 0; i < (int)NSTR; i++) {
2313 p = strings[i];
2314 for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
2315 rel = rels[j];
2316
2317 lo = 0; hi = arraylen-1;
2318 while (lo <= hi) {
2319 mid = (lo + hi) / 2;
2320 c = strcmp(p, array[mid]);
2321 if (c < 0)
2322 hi = mid-1;
2323 else if (c > 0)
2324 lo = mid+1;
2325 else
2326 break;
2327 }
2328
2329 if (c == 0) {
2330 if (rel == BT_REL_LT)
2331 ret = (mid > 0 ? array[--mid] : NULL);
2332 else if (rel == BT_REL_GT)
2333 ret = (mid < arraylen-1 ? array[++mid] : NULL);
2334 else
2335 ret = array[mid];
2336 } else {
2337 assert(lo == hi+1);
2338 if (rel == BT_REL_LT || rel == BT_REL_LE) {
2339 mid = hi;
2340 ret = (hi >= 0 ? array[hi] : NULL);
2341 } else if (rel == BT_REL_GT || rel == BT_REL_GE) {
2342 mid = lo;
2343 ret = (lo < arraylen ? array[lo] : NULL);
2344 } else
2345 ret = NULL;
2346 }
2347
2348 realret = bt_findrelpos(tree, p, NULL, rel, &index);
2349 testlock(-1, 0, NULL);
2350 if (realret != ret) {
2351 error("find(\"%s\",%s) gave %s should be %s",
2352 p, relnames[j], realret, ret);
2353 }
2354 if (realret && index != mid) {
2355 error("find(\"%s\",%s) gave %d should be %d",
2356 p, relnames[j], index, mid);
2357 }
2358 if (realret && rel == BT_REL_EQ) {
2359 realret2 = bt_index(tree, index);
2360 if (realret2 != realret) {
2361 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
2362 p, relnames[j], realret, index, index, realret2);
2363 }
2364 }
2365 }
2366 }
2367
2368 realret = bt_findrelpos(tree, NULL, NULL, BT_REL_GT, &index);
2369 testlock(-1, 0, NULL);
2370 if (arraylen && (realret != array[0] || index != 0)) {
2371 error("find(NULL,GT) gave %s(%d) should be %s(0)",
2372 realret, index, array[0]);
2373 } else if (!arraylen && (realret != NULL)) {
2374 error("find(NULL,GT) gave %s(%d) should be NULL",
2375 realret, index);
2376 }
2377
2378 realret = bt_findrelpos(tree, NULL, NULL, BT_REL_LT, &index);
2379 testlock(-1, 0, NULL);
2380 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
2381 error("find(NULL,LT) gave %s(%d) should be %s(0)",
2382 realret, index, array[arraylen-1]);
2383 } else if (!arraylen && (realret != NULL)) {
2384 error("find(NULL,LT) gave %s(%d) should be NULL",
2385 realret, index);
2386 }
2387}
2388
2389void splittest(btree *tree, bt_element_t *array, int arraylen)
2390{
2391 int i;
2392 btree *tree3, *tree4;
2393 for (i = 0; i <= arraylen; i++) {
2394 printf("splittest: %d\n", i);
2395 tree3 = BT_COPY(tree);
2396 testlock(-1, 0, NULL);
2397 tree4 = bt_splitpos(tree3, i, 0);
2398 testlock(-1, 0, NULL);
2399 verifytree(tree3, array, i);
2400 verifytree(tree4, array+i, arraylen-i);
2401 bt_join(tree3, tree4);
2402 testlock(-1, 0, NULL);
2403 verifytree(tree4, NULL, 0);
2404 bt_free(tree4); /* left empty by join */
2405 testlock(-1, 0, NULL);
2406 verifytree(tree3, array, arraylen);
2407 bt_free(tree3);
2408 testlock(-1, 0, NULL);
2409 }
2410}
2411
2412/*
2413 * Called to track read and write locks on nodes.
2414 */
2415void testlock(int write, int set, nodeptr n)
2416{
2417 static nodeptr readlocks[MAXLOCKS], writelocks[MAXLOCKS];
2418 static int nreadlocks = 0, nwritelocks = 0;
2419
2420 int i, rp, wp;
2421
2422 if (write == -1) {
2423 /* Called after an operation to ensure all locks are unlocked. */
2424 if (nreadlocks != 0 || nwritelocks != 0)
2425 error("at least one left-behind lock exists!");
2426 return;
2427 }
2428
2429 /* Locking NULL does nothing. Unlocking it is an error. */
2430 if (n == NULL) {
2431 if (!set)
2432 error("attempting to %s-unlock NULL", write ? "write" : "read");
2433 return;
2434 }
2435
2436 assert(nreadlocks < MAXLOCKS && nwritelocks < MAXLOCKS);
2437
2438 /* First look for the node in both lock lists. */
2439 rp = wp = -1;
2440 for (i = 0; i < nreadlocks; i++)
2441 if (readlocks[i] == n)
2442 rp = i;
2443 for (i = 0; i < nwritelocks; i++)
2444 if (writelocks[i] == n)
2445 wp = i;
2446
2447 /* Now diverge based on what we're supposed to be up to. */
2448 if (set) {
2449 /* Setting a lock. Should not already be locked in either list. */
2450 if (rp != -1 || wp != -1) {
2451 error("attempt to %s-lock node %p, already %s-locked",
2452 (write ? "write" : "read"), n, (rp==-1 ? "write" : "read"));
2453 }
2454 if (write)
2455 writelocks[nwritelocks++] = n;
2456 else
2457 readlocks[nreadlocks++] = n;
2458 } else {
2459 /* Clearing a lock. Should exist in exactly the correct list. */
2460 if (write && rp != -1)
2461 error("attempt to write-unlock node %p which is read-locked", n);
2462 if (!write && wp != -1)
2463 error("attempt to read-unlock node %p which is write-locked", n);
2464 if (wp != -1) {
2465 nwritelocks--;
2466 for (i = wp; i < nwritelocks; i++)
2467 writelocks[i] = writelocks[i+1];
2468 }
2469 if (rp != -1) {
2470 nreadlocks--;
2471 for (i = rp; i < nreadlocks; i++)
2472 readlocks[i] = readlocks[i+1];
2473 }
2474 }
2475}
2476
2477int main(void) {
2478 int in[NSTR];
2479 int i, j, k;
2480 int tworoot, tmplen;
2481 unsigned seed = 0;
2482 bt_element_t *array;
2483 int arraylen;
2484 bt_element_t ret, ret2, item;
2485 btree *tree, *tree2, *tree3, *tree4;
2486
2487 setvbuf(stdout, NULL, _IOLBF, 0);
2488 setvbuf(stderr, NULL, _IOLBF, 0);
2489 errors = 0;
2490
2491 for (i = 0; i < (int)NSTR; i++) in[i] = 0;
2492 array = newn(bt_element_t, MAXTREESIZE);
2493 arraylen = 0;
2494 tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2495 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2496
2497 verifytree(tree, array, arraylen);
2498 for (i = 0; i < 10000; i++) {
2499 j = randomnumber(&seed);
2500 j %= NSTR;
2501 printf("trial: %d\n", i);
2502 if (in[j]) {
2503 printf("deleting %s (%d)\n", strings[j], j);
2504 ret2 = array_del(array, &arraylen, strings[j]);
2505 ret = bt_del(tree, strings[j]);
2506 testlock(-1, 0, NULL);
2507 assert((bt_element_t)strings[j] == ret && ret == ret2);
2508 verifytree(tree, array, arraylen);
2509 in[j] = 0;
2510 } else {
2511 printf("adding %s (%d)\n", strings[j], j);
2512 array_add(array, &arraylen, strings[j]);
2513 ret = bt_add(tree, strings[j]);
2514 testlock(-1, 0, NULL);
2515 assert(strings[j] == ret);
2516 verifytree(tree, array, arraylen);
2517 in[j] = 1;
2518 }
2519 /* disptree(tree); */
2520 findtest(tree, array, arraylen);
2521 }
2522
2523 while (arraylen > 0) {
2524 j = randomnumber(&seed);
2525 j %= arraylen;
2526 item = array[j];
2527 ret2 = array_del(array, &arraylen, item);
2528 ret = bt_del(tree, item);
2529 testlock(-1, 0, NULL);
2530 assert(ret2 == ret);
2531 verifytree(tree, array, arraylen);
2532 }
2533
2534 bt_free(tree);
2535 testlock(-1, 0, NULL);
2536
2537 /*
2538 * Now try an unsorted tree. We don't really need to test
2539 * delpos because we know del is based on it, so it's already
2540 * been tested in the above sorted-tree code; but for
2541 * completeness we'll use it to tear down our unsorted tree
2542 * once we've built it.
2543 */
2544 tree = bt_new(NULL, NULL, NULL, 2*sizeof(int), alignof(int),
2545 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2546 verifytree(tree, array, arraylen);
2547 for (i = 0; i < 1000; i++) {
2548 printf("trial: %d\n", i);
2549 j = randomnumber(&seed);
2550 j %= NSTR;
2551 k = randomnumber(&seed);
2552 k %= bt_count(tree)+1;
2553 testlock(-1, 0, NULL);
2554 printf("adding string %s at index %d\n", strings[j], k);
2555 array_addpos(array, &arraylen, strings[j], k);
2556 bt_addpos(tree, strings[j], k);
2557 testlock(-1, 0, NULL);
2558 verifytree(tree, array, arraylen);
2559 }
2560
2561 /*
2562 * While we have this tree in its full form, we'll take a copy
2563 * of it to use in split and join testing.
2564 */
2565 tree2 = BT_COPY(tree);
2566 testlock(-1, 0, NULL);
2567 verifytree(tree2, array, arraylen);/* check the copy is accurate */
2568 /*
2569 * Split tests. Split the tree at every possible point and
2570 * check the resulting subtrees.
2571 */
2572 tworoot = bt_tworoot(tree2); /* see if it has a 2-root */
2573 testlock(-1, 0, NULL);
2574 splittest(tree2, array, arraylen);
2575 /*
2576 * Now do the split test again, but on a tree that has a 2-root
2577 * (if the previous one didn't) or doesn't (if the previous one
2578 * did).
2579 */
2580 tmplen = arraylen;
2581 while (bt_tworoot(tree2) == tworoot) {
2582 bt_delpos(tree2, --tmplen);
2583 testlock(-1, 0, NULL);
2584 }
2585 printf("now trying splits on second tree\n");
2586 splittest(tree2, array, tmplen);
2587 bt_free(tree2);
2588 testlock(-1, 0, NULL);
2589
2590 /*
2591 * Back to the main testing of uncounted trees.
2592 */
2593 while (bt_count(tree) > 0) {
2594 printf("cleanup: tree size %d\n", bt_count(tree));
2595 j = randomnumber(&seed);
2596 j %= bt_count(tree);
2597 printf("deleting string %s from index %d\n", (char *)array[j], j);
2598 ret = bt_delpos(tree, j);
2599 testlock(-1, 0, NULL);
2600 assert((bt_element_t)array[j] == ret);
2601 array_delpos(array, &arraylen, j);
2602 verifytree(tree, array, arraylen);
2603 }
2604 bt_free(tree);
2605 testlock(-1, 0, NULL);
2606
2607 /*
2608 * Finally, do some testing on split/join on _sorted_ trees. At
2609 * the same time, we'll be testing split on very small trees.
2610 */
2611 tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2612 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2613 arraylen = 0;
2614 for (i = 0; i < 16; i++) {
2615 array_add(array, &arraylen, strings[i]);
2616 ret = bt_add(tree, strings[i]);
2617 testlock(-1, 0, NULL);
2618 assert(strings[i] == ret);
2619 verifytree(tree, array, arraylen);
2620 tree2 = BT_COPY(tree);
2621 splittest(tree2, array, arraylen);
2622 testlock(-1, 0, NULL);
2623 bt_free(tree2);
2624 testlock(-1, 0, NULL);
2625 }
2626 bt_free(tree);
2627 testlock(-1, 0, NULL);
2628
2629 /*
2630 * Test silly cases of join: join(emptytree, emptytree), and
2631 * also ensure join correctly spots when sorted trees fail the
2632 * ordering constraint.
2633 */
2634 tree = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2635 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2636 tree2 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2637 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2638 tree3 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2639 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2640 tree4 = bt_new(mycmp, NULL, NULL, 2*sizeof(int), alignof(int),
2641 mypropmake, mypropmerge, NULL, TEST_DEGREE);
2642 assert(mycmp(NULL, strings[0], strings[1]) < 0); /* just in case :-) */
2643 bt_add(tree2, strings[1]);
2644 testlock(-1, 0, NULL);
2645 bt_add(tree4, strings[0]);
2646 testlock(-1, 0, NULL);
2647 array[0] = strings[0];
2648 array[1] = strings[1];
2649 verifytree(tree, array, 0);
2650 verifytree(tree2, array+1, 1);
2651 verifytree(tree3, array, 0);
2652 verifytree(tree4, array, 1);
2653
2654 /*
2655 * So:
2656 * - join(tree,tree3) should leave both tree and tree3 unchanged.
2657 * - joinr(tree,tree2) should leave both tree and tree2 unchanged.
2658 * - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
2659 * - join(tree, tree2) should move the element from tree2 to tree.
2660 * - joinr(tree4, tree3) should move the element from tree4 to tree3.
2661 * - join(tree,tree3) should return NULL and leave both unchanged.
2662 * - join(tree3,tree) should work and create a bigger tree in tree3.
2663 */
2664 assert(tree == bt_join(tree, tree3));
2665 testlock(-1, 0, NULL);
2666 verifytree(tree, array, 0);
2667 verifytree(tree3, array, 0);
2668 assert(tree2 == bt_joinr(tree, tree2));
2669 testlock(-1, 0, NULL);
2670 verifytree(tree, array, 0);
2671 verifytree(tree2, array+1, 1);
2672 assert(tree4 == bt_join(tree4, tree3));
2673 testlock(-1, 0, NULL);
2674 verifytree(tree3, array, 0);
2675 verifytree(tree4, array, 1);
2676 assert(tree == bt_join(tree, tree2));
2677 testlock(-1, 0, NULL);
2678 verifytree(tree, array+1, 1);
2679 verifytree(tree2, array, 0);
2680 assert(tree3 == bt_joinr(tree4, tree3));
2681 testlock(-1, 0, NULL);
2682 verifytree(tree3, array, 1);
2683 verifytree(tree4, array, 0);
2684 assert(NULL == bt_join(tree, tree3));
2685 testlock(-1, 0, NULL);
2686 verifytree(tree, array+1, 1);
2687 verifytree(tree3, array, 1);
2688 assert(tree3 == bt_join(tree3, tree));
2689 testlock(-1, 0, NULL);
2690 verifytree(tree3, array, 2);
2691 verifytree(tree, array, 0);
2692
2693 bt_free(tree);
2694 testlock(-1, 0, NULL);
2695 bt_free(tree2);
2696 testlock(-1, 0, NULL);
2697 bt_free(tree3);
2698 testlock(-1, 0, NULL);
2699 bt_free(tree4);
2700 testlock(-1, 0, NULL);
2701
2702 sfree(array);
2703
2704 if (errors)
2705 fprintf(stderr, "%d errors!\n", errors);
2706 return (errors != 0 ? 1 : 0);
2707}
2708
2709#endif