Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / index.c
CommitLineData
70322ae3 1/*
2 * index.c: Implementation of index.h.
3 */
4
50e82fdc 5#include "agedu.h"
70322ae3 6#include "trie.h"
7#include "index.h"
995db599 8#include "alloc.h"
70322ae3 9
10#define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
11
70322ae3 12#define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) )
13
14struct avlnode {
15 off_t children[2], element;
16 int maxdepth; /* maximum depth of this subtree */
17 unsigned long long totalsize;
18};
19
522edd92 20/*
21 * Determine the maximum depth of an AVL tree containing a certain
22 * number of nodes.
23 */
24static int index_maxdepth(int nodecount)
70322ae3 25{
522edd92 26 int b, c, maxdepth;
70322ae3 27
28 /*
29 * Model the tree growing at maximum imbalance. We do this by
30 * determining the number of nodes in the most unbalanced
31 * (i.e. smallest) tree of any given depth, and stopping when
32 * that's larger than nodecount.
33 */
70322ae3 34 maxdepth = 1;
35 b = 0;
36 c = 1;
37 while (b <= nodecount) {
38 int tmp;
39
70322ae3 40 tmp = 1 + b + c;
41 b = c;
42 c = tmp;
43 maxdepth++;
44 }
45
522edd92 46 return maxdepth;
70322ae3 47}
48
522edd92 49off_t index_initial_size(off_t currentsize, int nodecount)
70322ae3 50{
51 currentsize += PADDING(currentsize, alignof(off_t));
acaf12ce 52 currentsize += nodecount * sizeof(off_t);
70322ae3 53 currentsize += PADDING(currentsize, alignof(struct avlnode));
70322ae3 54
55 return currentsize;
56}
57
58/* ----------------------------------------------------------------------
59 * Functions to build the index.
60 */
61
62struct indexbuild {
63 void *t;
522edd92 64 int n, nnodes;
70322ae3 65 struct avlnode *nodes;
66 off_t *roots;
67 struct avlnode *currroot;
68 struct avlnode *firstmutable;
69};
70
71#define ELEMENT(t,offset) \
a8d1009f 72 ((struct trie_file *) ((offset) ? ((char *)(t) + (offset)) : NULL))
70322ae3 73#define NODE(t,offset) \
a8d1009f 74 ((struct avlnode *) ((offset) ? ((char *)(t) + (offset)) : NULL))
70322ae3 75#define OFFSET(t,node) \
a8d1009f 76 ((node) ? (off_t)((const char *)node - (const char *)t) : (off_t)0)
70322ae3 77#define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0)
78
522edd92 79indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount,
80 size_t *delta)
70322ae3 81{
82 indexbuild *ib = snew(indexbuild);
83
84 ib->t = t;
85 startoff += PADDING(startoff, alignof(off_t));
86 ib->roots = (off_t *)((char *)t + startoff);
87 trie_set_index_offset(t, startoff);
88 startoff += nodecount * sizeof(off_t);
89 startoff += PADDING(startoff, alignof(struct avlnode));
90 ib->nodes = (struct avlnode *)((char *)t + startoff);
70322ae3 91 ib->nnodes = ib->n = 0;
92 ib->currroot = NULL;
93 ib->firstmutable = ib->nodes + ib->nnodes;
94
522edd92 95 if (delta)
96 *delta = sizeof(struct avlnode) * (1 + index_maxdepth(nodecount));
97
70322ae3 98 return ib;
99}
100
101/*
102 * Return a mutable node, which is n or a copy of n if n is
103 * non-NULL.
104 */
105static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n)
106{
107 struct avlnode *newnode;
108
109 if (n && n >= ib->firstmutable)
110 return n; /* already mutable */
111
70322ae3 112 newnode = ib->nodes + ib->nnodes++;
113 if (n)
114 *newnode = *n; /* structure copy */
115 return newnode;
116}
117
118/*
119 * Fix the annotations in a tree node.
120 */
121static void avl_fix(indexbuild *ib, struct avlnode *n)
122{
123 /*
124 * Make sure the max depth field is right.
125 */
126 n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])),
127 MAXDEPTH(NODE(ib->t, n->children[1])));
128
129 n->totalsize =
84849cbd 130 (ELEMENT(ib->t, n->element)->size +
70322ae3 131 (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) +
132 (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0));
133}
134
135static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n,
136 off_t node)
137{
138 struct trie_file *newfile;
139 struct trie_file *oldfile;
140 int subtree;
141 struct avlnode *nn;
142
143 /*
144 * Recursion bottoming out: if the subtree we're inserting
145 * into is null, just construct and return a fresh node.
146 */
147 if (!n) {
148 n = avl_makemutable(ib, NULL);
149 n->children[0] = n->children[1] = 0;
150 n->element = node;
151 avl_fix(ib, n);
152 return n;
153 }
154
155 /*
156 * Otherwise, we have to insert into an existing tree.
157 */
158
159 /*
160 * Determine which subtree to insert this node into. Ties
161 * aren't important, so we just break them any old way.
162 */
163 newfile = (struct trie_file *)((char *)ib->t + node);
164 oldfile = (struct trie_file *)((char *)ib->t + n->element);
165 if (newfile->atime > oldfile->atime)
166 subtree = 1;
167 else
168 subtree = 0;
169
170 /*
171 * Construct a copy of the node we're looking at.
172 */
173 n = avl_makemutable(ib, n);
174
175 /*
176 * Recursively insert into the next subtree down.
177 */
178 nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node);
179 n->children[subtree] = OFFSET(ib->t, nn);
180
181 /*
182 * Rebalance if necessary, to ensure that our node's children
183 * differ in maximum depth by at most one. Of course, the
184 * subtree we've just modified will be the deeper one if so.
185 */
186 if (MAXDEPTH(NODE(ib->t, n->children[subtree])) >
187 MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) {
188 struct avlnode *p, *q;
189
190 /*
191 * There are two possible cases, one of which requires a
192 * single tree rotation and the other requires two. It all
193 * depends on which subtree of the next node down (here p)
194 * is the taller. (It turns out that they can't both be
195 * the same height: any tree which has just increased in
196 * depth must have one subtree strictly taller than the
197 * other.)
198 */
199 p = NODE(ib->t, n->children[subtree]);
200 assert(p >= ib->firstmutable);
201 if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >=
202 MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) {
203 /*
204 * n p
205 * / \ / \
206 * [k] p -> n [k+1]
207 * / \ / \
208 * [k] [k+1] [k] [k]
209 */
210 n->children[subtree] = p->children[1-subtree];
211 p->children[1-subtree] = OFFSET(ib->t, n);
212 avl_fix(ib, n);
213 n = p;
214 } else {
215 q = NODE(ib->t, p->children[1-subtree]);
216 assert(q >= ib->firstmutable);
217 p->children[1-subtree] = OFFSET(ib->t, q);
218 /*
219 * n n q
220 * / \ / \ / \
221 * [k] p == [k] p -> n p
222 * / \ / \ / \ / \
223 * [k+1] [k] q [k] [k] \ / [k]
224 * / \ [k-1,k] [k-1,k]
225 * [k-1,k] [k-1,k]
226 */
227 n->children[subtree] = q->children[1-subtree];
228 p->children[1-subtree] = q->children[subtree];
229 q->children[1-subtree] = OFFSET(ib->t, n);
230 q->children[subtree] = OFFSET(ib->t, p);
231 avl_fix(ib, n);
232 avl_fix(ib, p);
233 n = q;
234 }
235 }
236
237 /*
238 * Fix up our maximum depth field.
239 */
240 avl_fix(ib, n);
241
242 /*
243 * Done.
244 */
245 return n;
246}
247
248void indexbuild_add(indexbuild *ib, const struct trie_file *tf)
249{
250 off_t node = OFFSET(ib->t, tf);
251 ib->currroot = avl_insert(ib, ib->currroot, node);
14601b5d 252 ib->roots[ib->n++] = 0;
253}
254
255void indexbuild_tag(indexbuild *ib)
256{
257 if (ib->n > 0)
258 ib->roots[ib->n - 1] = OFFSET(ib->t, ib->currroot);
70322ae3 259 ib->firstmutable = ib->nodes + ib->nnodes;
260}
261
522edd92 262void indexbuild_rebase(indexbuild *ib, void *t)
263{
264 ptrdiff_t diff = (unsigned char *)t - (unsigned char *)(ib->t);
265
266 ib->t = t;
267 ib->nodes = (struct avlnode *)((unsigned char *)ib->nodes + diff);
268 ib->roots = (off_t *)((unsigned char *)ib->roots + diff);
38d8173d 269 if (ib->currroot)
270 ib->currroot = (struct avlnode *)
271 ((unsigned char *)ib->currroot + diff);
522edd92 272 ib->firstmutable = (struct avlnode *)((unsigned char *)ib->firstmutable + diff);
273}
274
70322ae3 275off_t indexbuild_realsize(indexbuild *ib)
276{
277 return OFFSET(ib->t, (ib->nodes + ib->nnodes));
278}
279
280void indexbuild_free(indexbuild *ib)
281{
14601b5d 282 assert(ib->n == trie_count(ib->t));
70322ae3 283 sfree(ib);
284}
285
92d3b326 286int index_has_root(const void *t, int n)
287{
288 const off_t *roots;
289
290 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
291
292 if (n == 0)
293 return 1;
294 if (n < 0 || n >= trie_count(t) || !roots[n-1])
295 return 0;
296 return 1;
297}
298
70322ae3 299unsigned long long index_query(const void *t, int n, unsigned long long at)
300{
301 const off_t *roots;
302 const struct avlnode *node;
303 unsigned long count;
304 unsigned long long ret;
305
306 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
307
308 if (n < 1)
309 return 0;
310 count = trie_count(t);
311 if (n > count)
312 n = count;
313
14601b5d 314 assert(roots[n-1]);
70322ae3 315 node = NODE(t, roots[n-1]);
316
317 ret = 0;
318
319 while (node) {
320 const struct trie_file *tf = ELEMENT(t, node->element);
321 const struct avlnode *left = NODE(t, node->children[0]);
322 const struct avlnode *right = NODE(t, node->children[1]);
323
324 if (at <= tf->atime) {
325 node = left;
326 } else {
327 if (left)
328 ret += left->totalsize;
84849cbd 329 ret += tf->size;
70322ae3 330 node = right;
331 }
332 }
333
334 return ret;
335}
336
337unsigned long long index_order_stat(const void *t, double f)
338{
339 const off_t *roots;
340 const struct avlnode *node;
341 unsigned long count;
342 unsigned long long size;
343
344 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
345 count = trie_count(t);
14601b5d 346 assert(roots[count-1]);
70322ae3 347 node = NODE(t, roots[count-1]);
348
349 size = node->totalsize * f;
350 assert(size <= node->totalsize);
351
352 while (1) {
353 const struct trie_file *tf = ELEMENT(t, node->element);
354 const struct avlnode *left = NODE(t, node->children[0]);
355 const struct avlnode *right = NODE(t, node->children[1]);
356
357 if (left && size < left->totalsize) {
358 node = left;
359 } else if (!right ||
84849cbd 360 size < (left ? left->totalsize : 0) + tf->size) {
70322ae3 361 return tf->atime;
362 } else {
363 if (left)
364 size -= left->totalsize;
84849cbd 365 size -= tf->size;
70322ae3 366 node = right;
367 }
368 }
369}