Various polishing: man page tweaks, --version now does something,
[sgt/agedu] / index.c
CommitLineData
70322ae3 1/*
2 * index.c: Implementation of index.h.
3 */
4
5#include <assert.h>
6#include <stdio.h>
7#include <stddef.h>
8
9#include "trie.h"
10#include "index.h"
995db599 11#include "alloc.h"
70322ae3 12
13#define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
14
15#define min(x,y) ((x)<(y) ? (x):(y))
16#define max(x,y) ((x)>(y) ? (x):(y))
17
18#define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) )
19
20struct avlnode {
21 off_t children[2], element;
22 int maxdepth; /* maximum depth of this subtree */
23 unsigned long long totalsize;
24};
25
26static int index_navlnodes(int nodecount)
27{
28 int b, c, maxdepth, ret;
29
30 /*
31 * Model the tree growing at maximum imbalance. We do this by
32 * determining the number of nodes in the most unbalanced
33 * (i.e. smallest) tree of any given depth, and stopping when
34 * that's larger than nodecount.
35 */
36 ret = 0;
37 maxdepth = 1;
38 b = 0;
39 c = 1;
40 while (b <= nodecount) {
41 int tmp;
42
43 /*
44 * A tree with at most b nodes can be at most depth
45 * maxdepth-1. A tree with more than b but at most c can
46 * be at most maxdepth. Therefore, for each tree size
47 * between b and c, we might need to adjust maxdepth+1
48 * nodes.
49 */
50 tmp = min(c, nodecount);
51 ret += (tmp - b) * maxdepth;
52
53 tmp = 1 + b + c;
54 b = c;
55 c = tmp;
56 maxdepth++;
57 }
58
59 return ret;
60}
61
62off_t index_compute_size(off_t currentsize, int nodecount)
63{
64 currentsize += PADDING(currentsize, alignof(off_t));
65 currentsize += nodecount + sizeof(off_t);
66 currentsize += PADDING(currentsize, alignof(struct avlnode));
67 currentsize += index_navlnodes(nodecount) * sizeof(struct avlnode);
68
69 return currentsize;
70}
71
72/* ----------------------------------------------------------------------
73 * Functions to build the index.
74 */
75
76struct indexbuild {
77 void *t;
78 int n, nnodes, maxnodes;
79 struct avlnode *nodes;
80 off_t *roots;
81 struct avlnode *currroot;
82 struct avlnode *firstmutable;
83};
84
85#define ELEMENT(t,offset) \
86 ((offset) ? (struct trie_file *)((char *)(t) + (offset)) : NULL)
87#define NODE(t,offset) \
88 ((offset) ? (struct avlnode *)((char *)(t) + (offset)) : NULL)
89#define OFFSET(t,node) \
90 ((node) ? (off_t)((const char *)node - (const char *)t) : 0)
91#define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0)
92
93indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount)
94{
95 indexbuild *ib = snew(indexbuild);
96
97 ib->t = t;
98 startoff += PADDING(startoff, alignof(off_t));
99 ib->roots = (off_t *)((char *)t + startoff);
100 trie_set_index_offset(t, startoff);
101 startoff += nodecount * sizeof(off_t);
102 startoff += PADDING(startoff, alignof(struct avlnode));
103 ib->nodes = (struct avlnode *)((char *)t + startoff);
104 ib->maxnodes = index_navlnodes(nodecount);
105 ib->nnodes = ib->n = 0;
106 ib->currroot = NULL;
107 ib->firstmutable = ib->nodes + ib->nnodes;
108
109 return ib;
110}
111
112/*
113 * Return a mutable node, which is n or a copy of n if n is
114 * non-NULL.
115 */
116static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n)
117{
118 struct avlnode *newnode;
119
120 if (n && n >= ib->firstmutable)
121 return n; /* already mutable */
122
123 assert(ib->nnodes < ib->maxnodes);
124 newnode = ib->nodes + ib->nnodes++;
125 if (n)
126 *newnode = *n; /* structure copy */
127 return newnode;
128}
129
130/*
131 * Fix the annotations in a tree node.
132 */
133static void avl_fix(indexbuild *ib, struct avlnode *n)
134{
135 /*
136 * Make sure the max depth field is right.
137 */
138 n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])),
139 MAXDEPTH(NODE(ib->t, n->children[1])));
140
141 n->totalsize =
84849cbd 142 (ELEMENT(ib->t, n->element)->size +
70322ae3 143 (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) +
144 (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0));
145}
146
147static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n,
148 off_t node)
149{
150 struct trie_file *newfile;
151 struct trie_file *oldfile;
152 int subtree;
153 struct avlnode *nn;
154
155 /*
156 * Recursion bottoming out: if the subtree we're inserting
157 * into is null, just construct and return a fresh node.
158 */
159 if (!n) {
160 n = avl_makemutable(ib, NULL);
161 n->children[0] = n->children[1] = 0;
162 n->element = node;
163 avl_fix(ib, n);
164 return n;
165 }
166
167 /*
168 * Otherwise, we have to insert into an existing tree.
169 */
170
171 /*
172 * Determine which subtree to insert this node into. Ties
173 * aren't important, so we just break them any old way.
174 */
175 newfile = (struct trie_file *)((char *)ib->t + node);
176 oldfile = (struct trie_file *)((char *)ib->t + n->element);
177 if (newfile->atime > oldfile->atime)
178 subtree = 1;
179 else
180 subtree = 0;
181
182 /*
183 * Construct a copy of the node we're looking at.
184 */
185 n = avl_makemutable(ib, n);
186
187 /*
188 * Recursively insert into the next subtree down.
189 */
190 nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node);
191 n->children[subtree] = OFFSET(ib->t, nn);
192
193 /*
194 * Rebalance if necessary, to ensure that our node's children
195 * differ in maximum depth by at most one. Of course, the
196 * subtree we've just modified will be the deeper one if so.
197 */
198 if (MAXDEPTH(NODE(ib->t, n->children[subtree])) >
199 MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) {
200 struct avlnode *p, *q;
201
202 /*
203 * There are two possible cases, one of which requires a
204 * single tree rotation and the other requires two. It all
205 * depends on which subtree of the next node down (here p)
206 * is the taller. (It turns out that they can't both be
207 * the same height: any tree which has just increased in
208 * depth must have one subtree strictly taller than the
209 * other.)
210 */
211 p = NODE(ib->t, n->children[subtree]);
212 assert(p >= ib->firstmutable);
213 if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >=
214 MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) {
215 /*
216 * n p
217 * / \ / \
218 * [k] p -> n [k+1]
219 * / \ / \
220 * [k] [k+1] [k] [k]
221 */
222 n->children[subtree] = p->children[1-subtree];
223 p->children[1-subtree] = OFFSET(ib->t, n);
224 avl_fix(ib, n);
225 n = p;
226 } else {
227 q = NODE(ib->t, p->children[1-subtree]);
228 assert(q >= ib->firstmutable);
229 p->children[1-subtree] = OFFSET(ib->t, q);
230 /*
231 * n n q
232 * / \ / \ / \
233 * [k] p == [k] p -> n p
234 * / \ / \ / \ / \
235 * [k+1] [k] q [k] [k] \ / [k]
236 * / \ [k-1,k] [k-1,k]
237 * [k-1,k] [k-1,k]
238 */
239 n->children[subtree] = q->children[1-subtree];
240 p->children[1-subtree] = q->children[subtree];
241 q->children[1-subtree] = OFFSET(ib->t, n);
242 q->children[subtree] = OFFSET(ib->t, p);
243 avl_fix(ib, n);
244 avl_fix(ib, p);
245 n = q;
246 }
247 }
248
249 /*
250 * Fix up our maximum depth field.
251 */
252 avl_fix(ib, n);
253
254 /*
255 * Done.
256 */
257 return n;
258}
259
260void indexbuild_add(indexbuild *ib, const struct trie_file *tf)
261{
262 off_t node = OFFSET(ib->t, tf);
263 ib->currroot = avl_insert(ib, ib->currroot, node);
264 ib->roots[ib->n++] = OFFSET(ib->t, ib->currroot);
265 ib->firstmutable = ib->nodes + ib->nnodes;
266}
267
268off_t indexbuild_realsize(indexbuild *ib)
269{
270 return OFFSET(ib->t, (ib->nodes + ib->nnodes));
271}
272
273void indexbuild_free(indexbuild *ib)
274{
275 sfree(ib);
276}
277
278unsigned long long index_query(const void *t, int n, unsigned long long at)
279{
280 const off_t *roots;
281 const struct avlnode *node;
282 unsigned long count;
283 unsigned long long ret;
284
285 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
286
287 if (n < 1)
288 return 0;
289 count = trie_count(t);
290 if (n > count)
291 n = count;
292
293 node = NODE(t, roots[n-1]);
294
295 ret = 0;
296
297 while (node) {
298 const struct trie_file *tf = ELEMENT(t, node->element);
299 const struct avlnode *left = NODE(t, node->children[0]);
300 const struct avlnode *right = NODE(t, node->children[1]);
301
302 if (at <= tf->atime) {
303 node = left;
304 } else {
305 if (left)
306 ret += left->totalsize;
84849cbd 307 ret += tf->size;
70322ae3 308 node = right;
309 }
310 }
311
312 return ret;
313}
314
315unsigned long long index_order_stat(const void *t, double f)
316{
317 const off_t *roots;
318 const struct avlnode *node;
319 unsigned long count;
320 unsigned long long size;
321
322 roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
323 count = trie_count(t);
324 node = NODE(t, roots[count-1]);
325
326 size = node->totalsize * f;
327 assert(size <= node->totalsize);
328
329 while (1) {
330 const struct trie_file *tf = ELEMENT(t, node->element);
331 const struct avlnode *left = NODE(t, node->children[0]);
332 const struct avlnode *right = NODE(t, node->children[1]);
333
334 if (left && size < left->totalsize) {
335 node = left;
336 } else if (!right ||
84849cbd 337 size < (left ? left->totalsize : 0) + tf->size) {
70322ae3 338 return tf->atime;
339 } else {
340 if (left)
341 size -= left->totalsize;
84849cbd 342 size -= tf->size;
70322ae3 343 node = right;
344 }
345 }
346}