70322ae3 |
1 | /* |
2 | * index.c: Implementation of index.h. |
3 | */ |
4 | |
5 | #include <assert.h> |
6 | #include <stdio.h> |
7 | #include <stddef.h> |
8 | |
9 | #include "trie.h" |
10 | #include "index.h" |
995db599 |
11 | #include "alloc.h" |
70322ae3 |
12 | |
13 | #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) ) |
14 | |
15 | #define min(x,y) ((x)<(y) ? (x):(y)) |
16 | #define max(x,y) ((x)>(y) ? (x):(y)) |
17 | |
18 | #define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) ) |
19 | |
20 | struct avlnode { |
21 | off_t children[2], element; |
22 | int maxdepth; /* maximum depth of this subtree */ |
23 | unsigned long long totalsize; |
24 | }; |
25 | |
26 | static int index_navlnodes(int nodecount) |
27 | { |
28 | int b, c, maxdepth, ret; |
29 | |
30 | /* |
31 | * Model the tree growing at maximum imbalance. We do this by |
32 | * determining the number of nodes in the most unbalanced |
33 | * (i.e. smallest) tree of any given depth, and stopping when |
34 | * that's larger than nodecount. |
35 | */ |
36 | ret = 0; |
37 | maxdepth = 1; |
38 | b = 0; |
39 | c = 1; |
40 | while (b <= nodecount) { |
41 | int tmp; |
42 | |
43 | /* |
44 | * A tree with at most b nodes can be at most depth |
45 | * maxdepth-1. A tree with more than b but at most c can |
46 | * be at most maxdepth. Therefore, for each tree size |
47 | * between b and c, we might need to adjust maxdepth+1 |
48 | * nodes. |
49 | */ |
50 | tmp = min(c, nodecount); |
51 | ret += (tmp - b) * maxdepth; |
52 | |
53 | tmp = 1 + b + c; |
54 | b = c; |
55 | c = tmp; |
56 | maxdepth++; |
57 | } |
58 | |
59 | return ret; |
60 | } |
61 | |
62 | off_t index_compute_size(off_t currentsize, int nodecount) |
63 | { |
64 | currentsize += PADDING(currentsize, alignof(off_t)); |
65 | currentsize += nodecount + sizeof(off_t); |
66 | currentsize += PADDING(currentsize, alignof(struct avlnode)); |
67 | currentsize += index_navlnodes(nodecount) * sizeof(struct avlnode); |
68 | |
69 | return currentsize; |
70 | } |
71 | |
72 | /* ---------------------------------------------------------------------- |
73 | * Functions to build the index. |
74 | */ |
75 | |
76 | struct indexbuild { |
77 | void *t; |
78 | int n, nnodes, maxnodes; |
79 | struct avlnode *nodes; |
80 | off_t *roots; |
81 | struct avlnode *currroot; |
82 | struct avlnode *firstmutable; |
83 | }; |
84 | |
85 | #define ELEMENT(t,offset) \ |
86 | ((offset) ? (struct trie_file *)((char *)(t) + (offset)) : NULL) |
87 | #define NODE(t,offset) \ |
88 | ((offset) ? (struct avlnode *)((char *)(t) + (offset)) : NULL) |
89 | #define OFFSET(t,node) \ |
90 | ((node) ? (off_t)((const char *)node - (const char *)t) : 0) |
91 | #define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0) |
92 | |
93 | indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount) |
94 | { |
95 | indexbuild *ib = snew(indexbuild); |
96 | |
97 | ib->t = t; |
98 | startoff += PADDING(startoff, alignof(off_t)); |
99 | ib->roots = (off_t *)((char *)t + startoff); |
100 | trie_set_index_offset(t, startoff); |
101 | startoff += nodecount * sizeof(off_t); |
102 | startoff += PADDING(startoff, alignof(struct avlnode)); |
103 | ib->nodes = (struct avlnode *)((char *)t + startoff); |
104 | ib->maxnodes = index_navlnodes(nodecount); |
105 | ib->nnodes = ib->n = 0; |
106 | ib->currroot = NULL; |
107 | ib->firstmutable = ib->nodes + ib->nnodes; |
108 | |
109 | return ib; |
110 | } |
111 | |
112 | /* |
113 | * Return a mutable node, which is n or a copy of n if n is |
114 | * non-NULL. |
115 | */ |
116 | static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n) |
117 | { |
118 | struct avlnode *newnode; |
119 | |
120 | if (n && n >= ib->firstmutable) |
121 | return n; /* already mutable */ |
122 | |
123 | assert(ib->nnodes < ib->maxnodes); |
124 | newnode = ib->nodes + ib->nnodes++; |
125 | if (n) |
126 | *newnode = *n; /* structure copy */ |
127 | return newnode; |
128 | } |
129 | |
130 | /* |
131 | * Fix the annotations in a tree node. |
132 | */ |
133 | static void avl_fix(indexbuild *ib, struct avlnode *n) |
134 | { |
135 | /* |
136 | * Make sure the max depth field is right. |
137 | */ |
138 | n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])), |
139 | MAXDEPTH(NODE(ib->t, n->children[1]))); |
140 | |
141 | n->totalsize = |
84849cbd |
142 | (ELEMENT(ib->t, n->element)->size + |
70322ae3 |
143 | (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) + |
144 | (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0)); |
145 | } |
146 | |
147 | static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n, |
148 | off_t node) |
149 | { |
150 | struct trie_file *newfile; |
151 | struct trie_file *oldfile; |
152 | int subtree; |
153 | struct avlnode *nn; |
154 | |
155 | /* |
156 | * Recursion bottoming out: if the subtree we're inserting |
157 | * into is null, just construct and return a fresh node. |
158 | */ |
159 | if (!n) { |
160 | n = avl_makemutable(ib, NULL); |
161 | n->children[0] = n->children[1] = 0; |
162 | n->element = node; |
163 | avl_fix(ib, n); |
164 | return n; |
165 | } |
166 | |
167 | /* |
168 | * Otherwise, we have to insert into an existing tree. |
169 | */ |
170 | |
171 | /* |
172 | * Determine which subtree to insert this node into. Ties |
173 | * aren't important, so we just break them any old way. |
174 | */ |
175 | newfile = (struct trie_file *)((char *)ib->t + node); |
176 | oldfile = (struct trie_file *)((char *)ib->t + n->element); |
177 | if (newfile->atime > oldfile->atime) |
178 | subtree = 1; |
179 | else |
180 | subtree = 0; |
181 | |
182 | /* |
183 | * Construct a copy of the node we're looking at. |
184 | */ |
185 | n = avl_makemutable(ib, n); |
186 | |
187 | /* |
188 | * Recursively insert into the next subtree down. |
189 | */ |
190 | nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node); |
191 | n->children[subtree] = OFFSET(ib->t, nn); |
192 | |
193 | /* |
194 | * Rebalance if necessary, to ensure that our node's children |
195 | * differ in maximum depth by at most one. Of course, the |
196 | * subtree we've just modified will be the deeper one if so. |
197 | */ |
198 | if (MAXDEPTH(NODE(ib->t, n->children[subtree])) > |
199 | MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) { |
200 | struct avlnode *p, *q; |
201 | |
202 | /* |
203 | * There are two possible cases, one of which requires a |
204 | * single tree rotation and the other requires two. It all |
205 | * depends on which subtree of the next node down (here p) |
206 | * is the taller. (It turns out that they can't both be |
207 | * the same height: any tree which has just increased in |
208 | * depth must have one subtree strictly taller than the |
209 | * other.) |
210 | */ |
211 | p = NODE(ib->t, n->children[subtree]); |
212 | assert(p >= ib->firstmutable); |
213 | if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >= |
214 | MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) { |
215 | /* |
216 | * n p |
217 | * / \ / \ |
218 | * [k] p -> n [k+1] |
219 | * / \ / \ |
220 | * [k] [k+1] [k] [k] |
221 | */ |
222 | n->children[subtree] = p->children[1-subtree]; |
223 | p->children[1-subtree] = OFFSET(ib->t, n); |
224 | avl_fix(ib, n); |
225 | n = p; |
226 | } else { |
227 | q = NODE(ib->t, p->children[1-subtree]); |
228 | assert(q >= ib->firstmutable); |
229 | p->children[1-subtree] = OFFSET(ib->t, q); |
230 | /* |
231 | * n n q |
232 | * / \ / \ / \ |
233 | * [k] p == [k] p -> n p |
234 | * / \ / \ / \ / \ |
235 | * [k+1] [k] q [k] [k] \ / [k] |
236 | * / \ [k-1,k] [k-1,k] |
237 | * [k-1,k] [k-1,k] |
238 | */ |
239 | n->children[subtree] = q->children[1-subtree]; |
240 | p->children[1-subtree] = q->children[subtree]; |
241 | q->children[1-subtree] = OFFSET(ib->t, n); |
242 | q->children[subtree] = OFFSET(ib->t, p); |
243 | avl_fix(ib, n); |
244 | avl_fix(ib, p); |
245 | n = q; |
246 | } |
247 | } |
248 | |
249 | /* |
250 | * Fix up our maximum depth field. |
251 | */ |
252 | avl_fix(ib, n); |
253 | |
254 | /* |
255 | * Done. |
256 | */ |
257 | return n; |
258 | } |
259 | |
260 | void indexbuild_add(indexbuild *ib, const struct trie_file *tf) |
261 | { |
262 | off_t node = OFFSET(ib->t, tf); |
263 | ib->currroot = avl_insert(ib, ib->currroot, node); |
264 | ib->roots[ib->n++] = OFFSET(ib->t, ib->currroot); |
265 | ib->firstmutable = ib->nodes + ib->nnodes; |
266 | } |
267 | |
268 | off_t indexbuild_realsize(indexbuild *ib) |
269 | { |
270 | return OFFSET(ib->t, (ib->nodes + ib->nnodes)); |
271 | } |
272 | |
273 | void indexbuild_free(indexbuild *ib) |
274 | { |
275 | sfree(ib); |
276 | } |
277 | |
278 | unsigned long long index_query(const void *t, int n, unsigned long long at) |
279 | { |
280 | const off_t *roots; |
281 | const struct avlnode *node; |
282 | unsigned long count; |
283 | unsigned long long ret; |
284 | |
285 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
286 | |
287 | if (n < 1) |
288 | return 0; |
289 | count = trie_count(t); |
290 | if (n > count) |
291 | n = count; |
292 | |
293 | node = NODE(t, roots[n-1]); |
294 | |
295 | ret = 0; |
296 | |
297 | while (node) { |
298 | const struct trie_file *tf = ELEMENT(t, node->element); |
299 | const struct avlnode *left = NODE(t, node->children[0]); |
300 | const struct avlnode *right = NODE(t, node->children[1]); |
301 | |
302 | if (at <= tf->atime) { |
303 | node = left; |
304 | } else { |
305 | if (left) |
306 | ret += left->totalsize; |
84849cbd |
307 | ret += tf->size; |
70322ae3 |
308 | node = right; |
309 | } |
310 | } |
311 | |
312 | return ret; |
313 | } |
314 | |
315 | unsigned long long index_order_stat(const void *t, double f) |
316 | { |
317 | const off_t *roots; |
318 | const struct avlnode *node; |
319 | unsigned long count; |
320 | unsigned long long size; |
321 | |
322 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
323 | count = trie_count(t); |
324 | node = NODE(t, roots[count-1]); |
325 | |
326 | size = node->totalsize * f; |
327 | assert(size <= node->totalsize); |
328 | |
329 | while (1) { |
330 | const struct trie_file *tf = ELEMENT(t, node->element); |
331 | const struct avlnode *left = NODE(t, node->children[0]); |
332 | const struct avlnode *right = NODE(t, node->children[1]); |
333 | |
334 | if (left && size < left->totalsize) { |
335 | node = left; |
336 | } else if (!right || |
84849cbd |
337 | size < (left ? left->totalsize : 0) + tf->size) { |
70322ae3 |
338 | return tf->atime; |
339 | } else { |
340 | if (left) |
341 | size -= left->totalsize; |
84849cbd |
342 | size -= tf->size; |
70322ae3 |
343 | node = right; |
344 | } |
345 | } |
346 | } |