| 1 | /* |
| 2 | * index.c: Implementation of index.h. |
| 3 | */ |
| 4 | |
| 5 | #include "agedu.h" |
| 6 | #include "trie.h" |
| 7 | #include "index.h" |
| 8 | #include "alloc.h" |
| 9 | |
| 10 | #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) ) |
| 11 | |
| 12 | #define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) ) |
| 13 | |
| 14 | struct avlnode { |
| 15 | off_t children[2], element; |
| 16 | int maxdepth; /* maximum depth of this subtree */ |
| 17 | unsigned long long totalsize; |
| 18 | }; |
| 19 | |
| 20 | /* |
| 21 | * Determine the maximum depth of an AVL tree containing a certain |
| 22 | * number of nodes. |
| 23 | */ |
| 24 | static int index_maxdepth(int nodecount) |
| 25 | { |
| 26 | int b, c, maxdepth; |
| 27 | |
| 28 | /* |
| 29 | * Model the tree growing at maximum imbalance. We do this by |
| 30 | * determining the number of nodes in the most unbalanced |
| 31 | * (i.e. smallest) tree of any given depth, and stopping when |
| 32 | * that's larger than nodecount. |
| 33 | */ |
| 34 | maxdepth = 1; |
| 35 | b = 0; |
| 36 | c = 1; |
| 37 | while (b <= nodecount) { |
| 38 | int tmp; |
| 39 | |
| 40 | tmp = 1 + b + c; |
| 41 | b = c; |
| 42 | c = tmp; |
| 43 | maxdepth++; |
| 44 | } |
| 45 | |
| 46 | return maxdepth; |
| 47 | } |
| 48 | |
| 49 | off_t index_initial_size(off_t currentsize, int nodecount) |
| 50 | { |
| 51 | currentsize += PADDING(currentsize, alignof(off_t)); |
| 52 | currentsize += nodecount * sizeof(off_t); |
| 53 | currentsize += PADDING(currentsize, alignof(struct avlnode)); |
| 54 | |
| 55 | return currentsize; |
| 56 | } |
| 57 | |
| 58 | /* ---------------------------------------------------------------------- |
| 59 | * Functions to build the index. |
| 60 | */ |
| 61 | |
| 62 | struct indexbuild { |
| 63 | void *t; |
| 64 | int n, nnodes; |
| 65 | struct avlnode *nodes; |
| 66 | off_t *roots; |
| 67 | struct avlnode *currroot; |
| 68 | struct avlnode *firstmutable; |
| 69 | }; |
| 70 | |
| 71 | #define ELEMENT(t,offset) \ |
| 72 | ((struct trie_file *) ((offset) ? ((char *)(t) + (offset)) : NULL)) |
| 73 | #define NODE(t,offset) \ |
| 74 | ((struct avlnode *) ((offset) ? ((char *)(t) + (offset)) : NULL)) |
| 75 | #define OFFSET(t,node) \ |
| 76 | ((node) ? (off_t)((const char *)node - (const char *)t) : (off_t)0) |
| 77 | #define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0) |
| 78 | |
| 79 | indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount, |
| 80 | size_t *delta) |
| 81 | { |
| 82 | indexbuild *ib = snew(indexbuild); |
| 83 | |
| 84 | ib->t = t; |
| 85 | startoff += PADDING(startoff, alignof(off_t)); |
| 86 | ib->roots = (off_t *)((char *)t + startoff); |
| 87 | trie_set_index_offset(t, startoff); |
| 88 | startoff += nodecount * sizeof(off_t); |
| 89 | startoff += PADDING(startoff, alignof(struct avlnode)); |
| 90 | ib->nodes = (struct avlnode *)((char *)t + startoff); |
| 91 | ib->nnodes = ib->n = 0; |
| 92 | ib->currroot = NULL; |
| 93 | ib->firstmutable = ib->nodes + ib->nnodes; |
| 94 | |
| 95 | if (delta) |
| 96 | *delta = sizeof(struct avlnode) * (1 + index_maxdepth(nodecount)); |
| 97 | |
| 98 | return ib; |
| 99 | } |
| 100 | |
| 101 | /* |
| 102 | * Return a mutable node, which is n or a copy of n if n is |
| 103 | * non-NULL. |
| 104 | */ |
| 105 | static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n) |
| 106 | { |
| 107 | struct avlnode *newnode; |
| 108 | |
| 109 | if (n && n >= ib->firstmutable) |
| 110 | return n; /* already mutable */ |
| 111 | |
| 112 | newnode = ib->nodes + ib->nnodes++; |
| 113 | if (n) |
| 114 | *newnode = *n; /* structure copy */ |
| 115 | return newnode; |
| 116 | } |
| 117 | |
| 118 | /* |
| 119 | * Fix the annotations in a tree node. |
| 120 | */ |
| 121 | static void avl_fix(indexbuild *ib, struct avlnode *n) |
| 122 | { |
| 123 | /* |
| 124 | * Make sure the max depth field is right. |
| 125 | */ |
| 126 | n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])), |
| 127 | MAXDEPTH(NODE(ib->t, n->children[1]))); |
| 128 | |
| 129 | n->totalsize = |
| 130 | (ELEMENT(ib->t, n->element)->size + |
| 131 | (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) + |
| 132 | (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0)); |
| 133 | } |
| 134 | |
| 135 | static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n, |
| 136 | off_t node) |
| 137 | { |
| 138 | struct trie_file *newfile; |
| 139 | struct trie_file *oldfile; |
| 140 | int subtree; |
| 141 | struct avlnode *nn; |
| 142 | |
| 143 | /* |
| 144 | * Recursion bottoming out: if the subtree we're inserting |
| 145 | * into is null, just construct and return a fresh node. |
| 146 | */ |
| 147 | if (!n) { |
| 148 | n = avl_makemutable(ib, NULL); |
| 149 | n->children[0] = n->children[1] = 0; |
| 150 | n->element = node; |
| 151 | avl_fix(ib, n); |
| 152 | return n; |
| 153 | } |
| 154 | |
| 155 | /* |
| 156 | * Otherwise, we have to insert into an existing tree. |
| 157 | */ |
| 158 | |
| 159 | /* |
| 160 | * Determine which subtree to insert this node into. Ties |
| 161 | * aren't important, so we just break them any old way. |
| 162 | */ |
| 163 | newfile = (struct trie_file *)((char *)ib->t + node); |
| 164 | oldfile = (struct trie_file *)((char *)ib->t + n->element); |
| 165 | if (newfile->atime > oldfile->atime) |
| 166 | subtree = 1; |
| 167 | else |
| 168 | subtree = 0; |
| 169 | |
| 170 | /* |
| 171 | * Construct a copy of the node we're looking at. |
| 172 | */ |
| 173 | n = avl_makemutable(ib, n); |
| 174 | |
| 175 | /* |
| 176 | * Recursively insert into the next subtree down. |
| 177 | */ |
| 178 | nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node); |
| 179 | n->children[subtree] = OFFSET(ib->t, nn); |
| 180 | |
| 181 | /* |
| 182 | * Rebalance if necessary, to ensure that our node's children |
| 183 | * differ in maximum depth by at most one. Of course, the |
| 184 | * subtree we've just modified will be the deeper one if so. |
| 185 | */ |
| 186 | if (MAXDEPTH(NODE(ib->t, n->children[subtree])) > |
| 187 | MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) { |
| 188 | struct avlnode *p, *q; |
| 189 | |
| 190 | /* |
| 191 | * There are two possible cases, one of which requires a |
| 192 | * single tree rotation and the other requires two. It all |
| 193 | * depends on which subtree of the next node down (here p) |
| 194 | * is the taller. (It turns out that they can't both be |
| 195 | * the same height: any tree which has just increased in |
| 196 | * depth must have one subtree strictly taller than the |
| 197 | * other.) |
| 198 | */ |
| 199 | p = NODE(ib->t, n->children[subtree]); |
| 200 | assert(p >= ib->firstmutable); |
| 201 | if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >= |
| 202 | MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) { |
| 203 | /* |
| 204 | * n p |
| 205 | * / \ / \ |
| 206 | * [k] p -> n [k+1] |
| 207 | * / \ / \ |
| 208 | * [k] [k+1] [k] [k] |
| 209 | */ |
| 210 | n->children[subtree] = p->children[1-subtree]; |
| 211 | p->children[1-subtree] = OFFSET(ib->t, n); |
| 212 | avl_fix(ib, n); |
| 213 | n = p; |
| 214 | } else { |
| 215 | q = NODE(ib->t, p->children[1-subtree]); |
| 216 | assert(q >= ib->firstmutable); |
| 217 | p->children[1-subtree] = OFFSET(ib->t, q); |
| 218 | /* |
| 219 | * n n q |
| 220 | * / \ / \ / \ |
| 221 | * [k] p == [k] p -> n p |
| 222 | * / \ / \ / \ / \ |
| 223 | * [k+1] [k] q [k] [k] \ / [k] |
| 224 | * / \ [k-1,k] [k-1,k] |
| 225 | * [k-1,k] [k-1,k] |
| 226 | */ |
| 227 | n->children[subtree] = q->children[1-subtree]; |
| 228 | p->children[1-subtree] = q->children[subtree]; |
| 229 | q->children[1-subtree] = OFFSET(ib->t, n); |
| 230 | q->children[subtree] = OFFSET(ib->t, p); |
| 231 | avl_fix(ib, n); |
| 232 | avl_fix(ib, p); |
| 233 | n = q; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | /* |
| 238 | * Fix up our maximum depth field. |
| 239 | */ |
| 240 | avl_fix(ib, n); |
| 241 | |
| 242 | /* |
| 243 | * Done. |
| 244 | */ |
| 245 | return n; |
| 246 | } |
| 247 | |
| 248 | void indexbuild_add(indexbuild *ib, const struct trie_file *tf) |
| 249 | { |
| 250 | off_t node = OFFSET(ib->t, tf); |
| 251 | ib->currroot = avl_insert(ib, ib->currroot, node); |
| 252 | ib->roots[ib->n++] = 0; |
| 253 | } |
| 254 | |
| 255 | void indexbuild_tag(indexbuild *ib) |
| 256 | { |
| 257 | if (ib->n > 0) |
| 258 | ib->roots[ib->n - 1] = OFFSET(ib->t, ib->currroot); |
| 259 | ib->firstmutable = ib->nodes + ib->nnodes; |
| 260 | } |
| 261 | |
| 262 | void indexbuild_rebase(indexbuild *ib, void *t) |
| 263 | { |
| 264 | ptrdiff_t diff = (unsigned char *)t - (unsigned char *)(ib->t); |
| 265 | |
| 266 | ib->t = t; |
| 267 | ib->nodes = (struct avlnode *)((unsigned char *)ib->nodes + diff); |
| 268 | ib->roots = (off_t *)((unsigned char *)ib->roots + diff); |
| 269 | if (ib->currroot) |
| 270 | ib->currroot = (struct avlnode *) |
| 271 | ((unsigned char *)ib->currroot + diff); |
| 272 | ib->firstmutable = (struct avlnode *)((unsigned char *)ib->firstmutable + diff); |
| 273 | } |
| 274 | |
| 275 | off_t indexbuild_realsize(indexbuild *ib) |
| 276 | { |
| 277 | return OFFSET(ib->t, (ib->nodes + ib->nnodes)); |
| 278 | } |
| 279 | |
| 280 | void indexbuild_free(indexbuild *ib) |
| 281 | { |
| 282 | assert(ib->n == trie_count(ib->t)); |
| 283 | sfree(ib); |
| 284 | } |
| 285 | |
| 286 | int index_has_root(const void *t, int n) |
| 287 | { |
| 288 | const off_t *roots; |
| 289 | |
| 290 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
| 291 | |
| 292 | if (n == 0) |
| 293 | return 1; |
| 294 | if (n < 0 || n >= trie_count(t) || !roots[n-1]) |
| 295 | return 0; |
| 296 | return 1; |
| 297 | } |
| 298 | |
| 299 | unsigned long long index_query(const void *t, int n, unsigned long long at) |
| 300 | { |
| 301 | const off_t *roots; |
| 302 | const struct avlnode *node; |
| 303 | unsigned long count; |
| 304 | unsigned long long ret; |
| 305 | |
| 306 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
| 307 | |
| 308 | if (n < 1) |
| 309 | return 0; |
| 310 | count = trie_count(t); |
| 311 | if (n > count) |
| 312 | n = count; |
| 313 | |
| 314 | assert(roots[n-1]); |
| 315 | node = NODE(t, roots[n-1]); |
| 316 | |
| 317 | ret = 0; |
| 318 | |
| 319 | while (node) { |
| 320 | const struct trie_file *tf = ELEMENT(t, node->element); |
| 321 | const struct avlnode *left = NODE(t, node->children[0]); |
| 322 | const struct avlnode *right = NODE(t, node->children[1]); |
| 323 | |
| 324 | if (at <= tf->atime) { |
| 325 | node = left; |
| 326 | } else { |
| 327 | if (left) |
| 328 | ret += left->totalsize; |
| 329 | ret += tf->size; |
| 330 | node = right; |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | return ret; |
| 335 | } |
| 336 | |
| 337 | unsigned long long index_order_stat(const void *t, double f) |
| 338 | { |
| 339 | const off_t *roots; |
| 340 | const struct avlnode *node; |
| 341 | unsigned long count; |
| 342 | unsigned long long size; |
| 343 | |
| 344 | roots = (const off_t *)((const char *)t + trie_get_index_offset(t)); |
| 345 | count = trie_count(t); |
| 346 | assert(roots[count-1]); |
| 347 | node = NODE(t, roots[count-1]); |
| 348 | |
| 349 | size = node->totalsize * f; |
| 350 | assert(size <= node->totalsize); |
| 351 | |
| 352 | while (1) { |
| 353 | const struct trie_file *tf = ELEMENT(t, node->element); |
| 354 | const struct avlnode *left = NODE(t, node->children[0]); |
| 355 | const struct avlnode *right = NODE(t, node->children[1]); |
| 356 | |
| 357 | if (left && size < left->totalsize) { |
| 358 | node = left; |
| 359 | } else if (!right || |
| 360 | size < (left ? left->totalsize : 0) + tf->size) { |
| 361 | return tf->atime; |
| 362 | } else { |
| 363 | if (left) |
| 364 | size -= left->totalsize; |
| 365 | size -= tf->size; |
| 366 | node = right; |
| 367 | } |
| 368 | } |
| 369 | } |