db3d2b34081ca93449289aa7969ca6759c471da9
[sgt/agedu] / index.c
1 /*
2 * index.c: Implementation of index.h.
3 */
4
5 #include "agedu.h"
6 #include "trie.h"
7 #include "index.h"
8 #include "alloc.h"
9
10 #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
11
12 #define min(x,y) ((x)<(y) ? (x):(y))
13 #define max(x,y) ((x)>(y) ? (x):(y))
14
15 #define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) )
16
17 struct avlnode {
18 off_t children[2], element;
19 int maxdepth; /* maximum depth of this subtree */
20 unsigned long long totalsize;
21 };
22
23 /*
24 * Determine the maximum depth of an AVL tree containing a certain
25 * number of nodes.
26 */
27 static int index_maxdepth(int nodecount)
28 {
29 int b, c, maxdepth;
30
31 /*
32 * Model the tree growing at maximum imbalance. We do this by
33 * determining the number of nodes in the most unbalanced
34 * (i.e. smallest) tree of any given depth, and stopping when
35 * that's larger than nodecount.
36 */
37 maxdepth = 1;
38 b = 0;
39 c = 1;
40 while (b <= nodecount) {
41 int tmp;
42
43 tmp = 1 + b + c;
44 b = c;
45 c = tmp;
46 maxdepth++;
47 }
48
49 return maxdepth;
50 }
51
52 off_t index_initial_size(off_t currentsize, int nodecount)
53 {
54 currentsize += PADDING(currentsize, alignof(off_t));
55 currentsize += nodecount + sizeof(off_t);
56 currentsize += PADDING(currentsize, alignof(struct avlnode));
57
58 return currentsize;
59 }
60
61 /* ----------------------------------------------------------------------
62 * Functions to build the index.
63 */
64
65 struct indexbuild {
66 void *t;
67 int n, nnodes;
68 struct avlnode *nodes;
69 off_t *roots;
70 struct avlnode *currroot;
71 struct avlnode *firstmutable;
72 };
73
74 #define ELEMENT(t,offset) \
75 ((offset) ? (struct trie_file *)((char *)(t) + (offset)) : NULL)
76 #define NODE(t,offset) \
77 ((offset) ? (struct avlnode *)((char *)(t) + (offset)) : NULL)
78 #define OFFSET(t,node) \
79 ((node) ? (off_t)((const char *)node - (const char *)t) : 0)
80 #define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0)
81
82 indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount,
83 size_t *delta)
84 {
85 indexbuild *ib = snew(indexbuild);
86
87 ib->t = t;
88 startoff += PADDING(startoff, alignof(off_t));
89 ib->roots = (off_t *)((char *)t + startoff);
90 trie_set_index_offset(t, startoff);
91 startoff += nodecount * sizeof(off_t);
92 startoff += PADDING(startoff, alignof(struct avlnode));
93 ib->nodes = (struct avlnode *)((char *)t + startoff);
94 ib->nnodes = ib->n = 0;
95 ib->currroot = NULL;
96 ib->firstmutable = ib->nodes + ib->nnodes;
97
98 if (delta)
99 *delta = sizeof(struct avlnode) * (1 + index_maxdepth(nodecount));
100
101 return ib;
102 }
103
104 /*
105 * Return a mutable node, which is n or a copy of n if n is
106 * non-NULL.
107 */
108 static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n)
109 {
110 struct avlnode *newnode;
111
112 if (n && n >= ib->firstmutable)
113 return n; /* already mutable */
114
115 newnode = ib->nodes + ib->nnodes++;
116 if (n)
117 *newnode = *n; /* structure copy */
118 return newnode;
119 }
120
121 /*
122 * Fix the annotations in a tree node.
123 */
124 static void avl_fix(indexbuild *ib, struct avlnode *n)
125 {
126 /*
127 * Make sure the max depth field is right.
128 */
129 n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])),
130 MAXDEPTH(NODE(ib->t, n->children[1])));
131
132 n->totalsize =
133 (ELEMENT(ib->t, n->element)->size +
134 (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) +
135 (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0));
136 }
137
138 static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n,
139 off_t node)
140 {
141 struct trie_file *newfile;
142 struct trie_file *oldfile;
143 int subtree;
144 struct avlnode *nn;
145
146 /*
147 * Recursion bottoming out: if the subtree we're inserting
148 * into is null, just construct and return a fresh node.
149 */
150 if (!n) {
151 n = avl_makemutable(ib, NULL);
152 n->children[0] = n->children[1] = 0;
153 n->element = node;
154 avl_fix(ib, n);
155 return n;
156 }
157
158 /*
159 * Otherwise, we have to insert into an existing tree.
160 */
161
162 /*
163 * Determine which subtree to insert this node into. Ties
164 * aren't important, so we just break them any old way.
165 */
166 newfile = (struct trie_file *)((char *)ib->t + node);
167 oldfile = (struct trie_file *)((char *)ib->t + n->element);
168 if (newfile->atime > oldfile->atime)
169 subtree = 1;
170 else
171 subtree = 0;
172
173 /*
174 * Construct a copy of the node we're looking at.
175 */
176 n = avl_makemutable(ib, n);
177
178 /*
179 * Recursively insert into the next subtree down.
180 */
181 nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node);
182 n->children[subtree] = OFFSET(ib->t, nn);
183
184 /*
185 * Rebalance if necessary, to ensure that our node's children
186 * differ in maximum depth by at most one. Of course, the
187 * subtree we've just modified will be the deeper one if so.
188 */
189 if (MAXDEPTH(NODE(ib->t, n->children[subtree])) >
190 MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) {
191 struct avlnode *p, *q;
192
193 /*
194 * There are two possible cases, one of which requires a
195 * single tree rotation and the other requires two. It all
196 * depends on which subtree of the next node down (here p)
197 * is the taller. (It turns out that they can't both be
198 * the same height: any tree which has just increased in
199 * depth must have one subtree strictly taller than the
200 * other.)
201 */
202 p = NODE(ib->t, n->children[subtree]);
203 assert(p >= ib->firstmutable);
204 if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >=
205 MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) {
206 /*
207 * n p
208 * / \ / \
209 * [k] p -> n [k+1]
210 * / \ / \
211 * [k] [k+1] [k] [k]
212 */
213 n->children[subtree] = p->children[1-subtree];
214 p->children[1-subtree] = OFFSET(ib->t, n);
215 avl_fix(ib, n);
216 n = p;
217 } else {
218 q = NODE(ib->t, p->children[1-subtree]);
219 assert(q >= ib->firstmutable);
220 p->children[1-subtree] = OFFSET(ib->t, q);
221 /*
222 * n n q
223 * / \ / \ / \
224 * [k] p == [k] p -> n p
225 * / \ / \ / \ / \
226 * [k+1] [k] q [k] [k] \ / [k]
227 * / \ [k-1,k] [k-1,k]
228 * [k-1,k] [k-1,k]
229 */
230 n->children[subtree] = q->children[1-subtree];
231 p->children[1-subtree] = q->children[subtree];
232 q->children[1-subtree] = OFFSET(ib->t, n);
233 q->children[subtree] = OFFSET(ib->t, p);
234 avl_fix(ib, n);
235 avl_fix(ib, p);
236 n = q;
237 }
238 }
239
240 /*
241 * Fix up our maximum depth field.
242 */
243 avl_fix(ib, n);
244
245 /*
246 * Done.
247 */
248 return n;
249 }
250
251 void indexbuild_add(indexbuild *ib, const struct trie_file *tf)
252 {
253 off_t node = OFFSET(ib->t, tf);
254 ib->currroot = avl_insert(ib, ib->currroot, node);
255 ib->roots[ib->n++] = 0;
256 }
257
258 void indexbuild_tag(indexbuild *ib)
259 {
260 if (ib->n > 0)
261 ib->roots[ib->n - 1] = OFFSET(ib->t, ib->currroot);
262 ib->firstmutable = ib->nodes + ib->nnodes;
263 }
264
265 void indexbuild_rebase(indexbuild *ib, void *t)
266 {
267 ptrdiff_t diff = (unsigned char *)t - (unsigned char *)(ib->t);
268
269 ib->t = t;
270 ib->nodes = (struct avlnode *)((unsigned char *)ib->nodes + diff);
271 ib->roots = (off_t *)((unsigned char *)ib->roots + diff);
272 ib->currroot = (struct avlnode *)((unsigned char *)ib->currroot + diff);
273 ib->firstmutable = (struct avlnode *)((unsigned char *)ib->firstmutable + diff);
274 }
275
276 off_t indexbuild_realsize(indexbuild *ib)
277 {
278 return OFFSET(ib->t, (ib->nodes + ib->nnodes));
279 }
280
281 void indexbuild_free(indexbuild *ib)
282 {
283 assert(ib->n == trie_count(ib->t));
284 sfree(ib);
285 }
286
287 unsigned long long index_query(const void *t, int n, unsigned long long at)
288 {
289 const off_t *roots;
290 const struct avlnode *node;
291 unsigned long count;
292 unsigned long long ret;
293
294 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
295
296 if (n < 1)
297 return 0;
298 count = trie_count(t);
299 if (n > count)
300 n = count;
301
302 assert(roots[n-1]);
303 node = NODE(t, roots[n-1]);
304
305 ret = 0;
306
307 while (node) {
308 const struct trie_file *tf = ELEMENT(t, node->element);
309 const struct avlnode *left = NODE(t, node->children[0]);
310 const struct avlnode *right = NODE(t, node->children[1]);
311
312 if (at <= tf->atime) {
313 node = left;
314 } else {
315 if (left)
316 ret += left->totalsize;
317 ret += tf->size;
318 node = right;
319 }
320 }
321
322 return ret;
323 }
324
325 unsigned long long index_order_stat(const void *t, double f)
326 {
327 const off_t *roots;
328 const struct avlnode *node;
329 unsigned long count;
330 unsigned long long size;
331
332 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
333 count = trie_count(t);
334 assert(roots[count-1]);
335 node = NODE(t, roots[count-1]);
336
337 size = node->totalsize * f;
338 assert(size <= node->totalsize);
339
340 while (1) {
341 const struct trie_file *tf = ELEMENT(t, node->element);
342 const struct avlnode *left = NODE(t, node->children[0]);
343 const struct avlnode *right = NODE(t, node->children[1]);
344
345 if (left && size < left->totalsize) {
346 node = left;
347 } else if (!right ||
348 size < (left ? left->totalsize : 0) + tf->size) {
349 return tf->atime;
350 } else {
351 if (left)
352 size -= left->totalsize;
353 size -= tf->size;
354 node = right;
355 }
356 }
357 }