70322ae3 |
1 | /* |
2 | * index.c: Implementation of index.h. |
3 | */ |
4 | |
50e82fdc |
5 | #include "agedu.h" |
70322ae3 |
6 | #include "trie.h" |
7 | #include "index.h" |
995db599 |
8 | #include "alloc.h" |
70322ae3 |
9 | |
10 | #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) ) |
11 | |
70322ae3 |
12 | #define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) ) |
13 | |
14 | struct avlnode { |
15 | off_t children[2], element; |
16 | int maxdepth; /* maximum depth of this subtree */ |
17 | unsigned long long totalsize; |
18 | }; |
19 | |
522edd92 |
20 | /* |
21 | * Determine the maximum depth of an AVL tree containing a certain |
22 | * number of nodes. |
23 | */ |
24 | static int index_maxdepth(int nodecount) |
70322ae3 |
25 | { |
522edd92 |
26 | int b, c, maxdepth; |
70322ae3 |
27 | |
28 | /* |
29 | * Model the tree growing at maximum imbalance. We do this by |
30 | * determining the number of nodes in the most unbalanced |
31 | * (i.e. smallest) tree of any given depth, and stopping when |
32 | * that's larger than nodecount. |
33 | */ |
70322ae3 |
34 | maxdepth = 1; |
35 | b = 0; |
36 | c = 1; |
37 | while (b <= nodecount) { |
38 | int tmp; |
39 | |
70322ae3 |
40 | tmp = 1 + b + c; |
41 | b = c; |
42 | c = tmp; |
43 | maxdepth++; |
44 | } |
45 | |
522edd92 |
46 | return maxdepth; |
70322ae3 |
47 | } |
48 | |
522edd92 |
49 | off_t index_initial_size(off_t currentsize, int nodecount) |
70322ae3 |
50 | { |
51 | currentsize += PADDING(currentsize, alignof(off_t)); |
acaf12ce |
52 | currentsize += nodecount * sizeof(off_t); |
70322ae3 |
53 | currentsize += PADDING(currentsize, alignof(struct avlnode)); |
70322ae3 |
54 | |
55 | return currentsize; |
56 | } |
57 | |
58 | /* ---------------------------------------------------------------------- |
59 | * Functions to build the index. |
60 | */ |
61 | |
62 | struct indexbuild { |
63 | void *t; |
522edd92 |
64 | int n, nnodes; |
70322ae3 |
65 | struct avlnode *nodes; |
66 | off_t *roots; |
67 | struct avlnode *currroot; |
68 | struct avlnode *firstmutable; |
69 | }; |
70 | |
71 | #define ELEMENT(t,offset) \ |
72 | ((offset) ? (struct trie_file *)((char *)(t) + (offset)) : NULL) |
73 | #define NODE(t,offset) \ |
74 | ((offset) ? (struct avlnode *)((char *)(t) + (offset)) : NULL) |
75 | #define OFFSET(t,node) \ |
76 | ((node) ? (off_t)((const char *)node - (const char *)t) : 0) |
77 | #define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0) |
78 | |
522edd92 |
79 | indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount, |
80 | size_t *delta) |
70322ae3 |
81 | { |
82 | indexbuild *ib = snew(indexbuild); |
83 | |
84 | ib->t = t; |
85 | startoff += PADDING(startoff, alignof(off_t)); |
86 | ib->roots = (off_t *)((char *)t + startoff); |
87 | trie_set_index_offset(t, startoff); |
88 | startoff += nodecount * sizeof(off_t); |
89 | startoff += PADDING(startoff, alignof(struct avlnode)); |
90 | ib->nodes = (struct avlnode *)((char *)t + startoff); |
70322ae3 |
91 | ib->nnodes = ib->n = 0; |
92 | ib->currroot = NULL; |
93 | ib->firstmutable = ib->nodes + ib->nnodes; |
94 | |
522edd92 |
95 | if (delta) |
96 | *delta = sizeof(struct avlnode) * (1 + index_maxdepth(nodecount)); |
97 | |
70322ae3 |
98 | return ib; |
99 | } |
100 | |
101 | /* |
102 | * Return a mutable node, which is n or a copy of n if n is |
103 | * non-NULL. |
104 | */ |
105 | static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n) |
106 | { |
107 | struct avlnode *newnode; |
108 | |
109 | if (n && n >= ib->firstmutable) |
110 | return n; /* already mutable */ |
111 | |
70322ae3 |
112 | newnode = ib->nodes + ib->nnodes++; |
113 | if (n) |
114 | *newnode = *n; /* structure copy */ |
115 | return newnode; |
116 | } |
117 | |
118 | /* |
119 | * Fix the annotations in a tree node. |
120 | */ |
121 | static void avl_fix(indexbuild *ib, struct avlnode *n) |
122 | { |
123 | /* |
124 | * Make sure the max depth field is right. |
125 | */ |
126 | n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])), |
127 | MAXDEPTH(NODE(ib->t, n->children[1]))); |
128 | |
129 | n->totalsize = |
84849cbd |
130 | (ELEMENT(ib->t, n->element)->size + |
70322ae3 |
131 | (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) + |
132 | (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0)); |
133 | } |
134 | |
135 | static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n, |
136 | off_t node) |
137 | { |
138 | struct trie_file *newfile; |
139 | struct trie_file *oldfile; |
140 | int subtree; |
141 | struct avlnode *nn; |
142 | |
143 | /* |
144 | * Recursion bottoming out: if the subtree we're inserting |
145 | * into is null, just construct and return a fresh node. |
146 | */ |
147 | if (!n) { |
148 | n = avl_makemutable(ib, NULL); |
149 | n->children[0] = n->children[1] = 0; |
150 | n->element = node; |
151 | avl_fix(ib, n); |
152 | return n; |
153 | } |
154 | |
155 | /* |
156 | * Otherwise, we have to insert into an existing tree. |
157 | */ |
158 | |
159 | /* |
160 | * Determine which subtree to insert this node into. Ties |
161 | * aren't important, so we just break them any old way. |
162 | */ |
163 | newfile = (struct trie_file *)((char *)ib->t + node); |
164 | oldfile = (struct trie_file *)((char *)ib->t + n->element); |
165 | if (newfile->atime > oldfile->atime) |
166 | subtree = 1; |
167 | else |
168 | subtree = 0; |
169 | |
170 | /* |
171 | * Construct a copy of the node we're looking at. |
172 | */ |
173 | n = avl_makemutable(ib, n); |
174 | |
175 | /* |
176 | * Recursively insert into the next subtree down. |
177 | */ |
178 | nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node); |
179 | n->children[subtree] = OFFSET(ib->t, nn); |
180 | |
181 | /* |
182 | * Rebalance if necessary, to ensure that our node's children |
183 | * differ in maximum depth by at most one. Of course, the |
184 | * subtree we've just modified will be the deeper one if so. |
185 | */ |
186 | if (MAXDEPTH(NODE(ib->t, n->children[subtree])) > |
187 | MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) { |
188 | struct avlnode *p, *q; |
189 | |
190 | /* |
191 | * There are two possible cases, one of which requires a |
192 | * single tree rotation and the other requires two. It all |
193 | * depends on which subtree of the next node down (here p) |
194 | * is the taller. (It turns out that they can't both be |
195 | * the same height: any tree which has just increased in |
196 | * depth must have one subtree strictly taller than the |
197 | * other.) |
198 | */ |
199 | p = NODE(ib->t, n->children[subtree]); |
200 | assert(p >= ib->firstmutable); |
201 | if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >= |
202 | MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) { |
203 | /* |
204 | * n p |
205 | * / \ / \ |
206 | * [k] p -> n [k+1] |
207 | * / \ / \ |
208 | * [k] [k+1] [k] [k] |
209 | */ |
210 | n->children[subtree] = p->children[1-subtree]; |
211 | p->children[1-subtree] = OFFSET(ib->t, n); |
212 | avl_fix(ib, n); |
213 | n = p; |
214 | } else { |
215 | q = NODE(ib->t, p->children[1-subtree]); |
216 | assert(q >= ib->firstmutable); |
217 | p->children[1-subtree] = OFFSET(ib->t, q); |
218 | /* |
219 | * n n q |
220 | * / \ / \ / \ |
221 | * [k] p == [k] p -> n p |
222 | * / \ / \ / \ / \ |
223 | * [k+1] [k] q [k] [k] \ / [k] |
224 | * / \ [k-1,k] [k-1,k] |
225 | * [k-1,k] [k-1,k] |
226 | */ |
227 | n->children[subtree] = q->children[1-subtree]; |
228 | p->children[1-subtree] = q->children[subtree]; |
229 | q->children[1-subtree] = OFFSET(ib->t, n); |
230 | q->children[subtree] = OFFSET(ib->t, p); |
231 | avl_fix(ib, n); |
232 | avl_fix(ib, p); |
233 | n = q; |
234 | } |
235 | } |
236 | |
237 | /* |
238 | * Fix up our maximum depth field. |
239 | */ |
240 | avl_fix(ib, n); |
241 | |
242 | /* |
243 | * Done. |
244 | */ |
245 | return n; |
246 | } |
247 | |
248 | void indexbuild_add(indexbuild *ib, const struct trie_file *tf) |
249 | { |
250 | off_t node = OFFSET(ib->t, tf); |
251 | ib->currroot = avl_insert(ib, ib->currroot, node); |
14601b5d |
252 | ib->roots[ib->n++] = 0; |
253 | } |
254 | |
255 | void indexbuild_tag(indexbuild *ib) |
256 | { |
257 | if (ib->n > 0) |
258 | ib->roots[ib->n - 1] = OFFSET(ib->t, ib->currroot); |
70322ae3 |
259 | ib->firstmutable = ib->nodes + ib->nnodes; |
260 | } |
261 | |
522edd92 |
262 | void indexbuild_rebase(indexbuild *ib, void *t) |
263 | { |
264 | ptrdiff_t diff = (unsigned char *)t - (unsigned char *)(ib->t); |
265 | |
266 | ib->t = t; |
267 | ib->nodes = (struct avlnode *)((unsigned char *)ib->nodes + diff); |
268 | ib->roots = (off_t *)((unsigned char *)ib->roots + diff); |
38d8173d |
269 | if (ib->currroot) |
270 | ib->currroot = (struct avlnode *) |
271 | ((unsigned char *)ib->currroot + diff); |
522edd92 |
272 | ib->firstmutable = (struct avlnode *)((unsigned char *)ib->firstmutable + diff); |
273 | } |
274 | |
70322ae3 |
275 | off_t indexbuild_realsize(indexbuild *ib) |
276 | { |
277 | return OFFSET(ib->t, (ib->nodes + ib->nnodes)); |
278 | } |
279 | |
280 | void indexbuild_free(indexbuild *ib) |
281 | { |
14601b5d |
282 | assert(ib->n == trie_count(ib->t)); |
70322ae3 |
283 | sfree(ib); |
284 | } |
285 | |
286 | unsigned long long index_query(const void *t, int n, unsigned long long at) |
287 | { |
288 | const off_t *roots; |
289 | const struct avlnode *node; |
290 | unsigned long count; |
291 | unsigned long long ret; |
292 | |
293 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
294 | |
295 | if (n < 1) |
296 | return 0; |
297 | count = trie_count(t); |
298 | if (n > count) |
299 | n = count; |
300 | |
14601b5d |
301 | assert(roots[n-1]); |
70322ae3 |
302 | node = NODE(t, roots[n-1]); |
303 | |
304 | ret = 0; |
305 | |
306 | while (node) { |
307 | const struct trie_file *tf = ELEMENT(t, node->element); |
308 | const struct avlnode *left = NODE(t, node->children[0]); |
309 | const struct avlnode *right = NODE(t, node->children[1]); |
310 | |
311 | if (at <= tf->atime) { |
312 | node = left; |
313 | } else { |
314 | if (left) |
315 | ret += left->totalsize; |
84849cbd |
316 | ret += tf->size; |
70322ae3 |
317 | node = right; |
318 | } |
319 | } |
320 | |
321 | return ret; |
322 | } |
323 | |
324 | unsigned long long index_order_stat(const void *t, double f) |
325 | { |
326 | const off_t *roots; |
327 | const struct avlnode *node; |
328 | unsigned long count; |
329 | unsigned long long size; |
330 | |
331 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
332 | count = trie_count(t); |
14601b5d |
333 | assert(roots[count-1]); |
70322ae3 |
334 | node = NODE(t, roots[count-1]); |
335 | |
336 | size = node->totalsize * f; |
337 | assert(size <= node->totalsize); |
338 | |
339 | while (1) { |
340 | const struct trie_file *tf = ELEMENT(t, node->element); |
341 | const struct avlnode *left = NODE(t, node->children[0]); |
342 | const struct avlnode *right = NODE(t, node->children[1]); |
343 | |
344 | if (left && size < left->totalsize) { |
345 | node = left; |
346 | } else if (!right || |
84849cbd |
347 | size < (left ? left->totalsize : 0) + tf->size) { |
70322ae3 |
348 | return tf->atime; |
349 | } else { |
350 | if (left) |
351 | size -= left->totalsize; |
84849cbd |
352 | size -= tf->size; |
70322ae3 |
353 | node = right; |
354 | } |
355 | } |
356 | } |