70322ae3 |
1 | /* |
2 | * trie.c: implementation of trie.h. |
3 | */ |
4 | |
353bc75d |
5 | #include "agedu.h" |
995db599 |
6 | #include "alloc.h" |
70322ae3 |
7 | #include "trie.h" |
8 | |
9 | #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) ) |
10 | |
11 | /* |
12 | * Compare functions for pathnames. Returns the relative order of |
13 | * the names, like strcmp; also passes back the offset of the |
14 | * first differing character if desired. |
15 | */ |
16 | static int trieccmp(unsigned char a, unsigned char b) |
17 | { |
2f23825b |
18 | int ia = (a == '\0' ? '\0' : a == pathsep ? '\1' : a+1); |
19 | int ib = (b == '\0' ? '\0' : b == pathsep ? '\1' : b+1); |
20 | return ia - ib; |
70322ae3 |
21 | } |
22 | |
23 | static int triencmp(const char *a, size_t alen, |
24 | const char *b, size_t blen, int *offset) |
25 | { |
26 | int off = 0; |
27 | while (off < alen && off < blen && a[off] == b[off]) |
28 | off++; |
29 | if (offset) |
30 | *offset = off; |
31 | if (off == alen || off == blen) return (off == blen) - (off == alen); |
32 | return trieccmp(a[off], b[off]); |
33 | } |
34 | |
35 | static int triecmp(const char *a, const char *b, int *offset) |
36 | { |
37 | return triencmp(a, strlen(a), b, strlen(b), offset); |
38 | } |
39 | |
40 | /* ---------------------------------------------------------------------- |
41 | * Trie node structures. |
42 | * |
43 | * The trie format stored in the file consists of three distinct |
44 | * node types, each with a distinguishing type field at the start. |
45 | * |
46 | * TRIE_LEAF is a leaf node; it contains an actual trie_file |
47 | * structure, and indicates that when you're searching down the |
48 | * trie with a string, you should now expect to encounter |
49 | * end-of-string. |
50 | * |
51 | * TRIE_SWITCH indicates that the set of strings in the trie |
52 | * include strings with more than one distinct character after the |
53 | * prefix leading up to this point. Hence, it stores multiple |
54 | * subnode pointers and a different single character for each one. |
55 | * |
56 | * TRIE_STRING indicates that at this point everything in the trie |
57 | * has the same next few characters; it stores a single mandatory |
58 | * string fragment and exactly one subnode pointer. |
59 | */ |
60 | enum { |
61 | TRIE_LEAF = 0x7fffe000, |
62 | TRIE_SWITCH, |
63 | TRIE_STRING |
64 | }; |
65 | |
66 | struct trie_common { |
67 | int type; |
68 | }; |
69 | |
70 | struct trie_switchentry { |
71 | off_t subnode; |
72 | int subcount; |
73 | }; |
74 | |
75 | struct trie_leaf { |
76 | struct trie_common c; |
77 | struct trie_file file; |
78 | }; |
79 | |
80 | struct trie_switch { |
81 | struct trie_common c; |
82 | /* |
83 | * sw[0] to sw[len-1] give the subnode pointers and element |
84 | * counts. At &sw[len] is stored len single bytes which are |
85 | * the characters corresponding to each subnode. |
86 | */ |
87 | int len; |
88 | struct trie_switchentry sw[]; |
89 | }; |
90 | |
91 | struct trie_string { |
92 | struct trie_common c; |
93 | int stringlen; |
94 | off_t subnode; |
95 | char string[]; |
96 | }; |
97 | |
bb013b1f |
98 | static const char magic_ident_string[16] = "agedu index file"; |
99 | struct trie_magic { |
100 | /* |
101 | * 'Magic numbers' to go at the start of an agedu index file. |
102 | * These are checked (using trie_check_magic) by every agedu mode |
103 | * which reads a pre-existing index. |
104 | * |
105 | * As well as identifying an agedu file from any other kind of |
106 | * file, this magic-number structure is also intended to detect |
107 | * agedu files which were written on the wrong platform and hence |
108 | * whose structure layouts are incompatible. To make that as |
109 | * reliable as possible, I design the structure of magic numbers |
110 | * as follows: it contains one of each integer type I might use, |
111 | * each containing a different magic number, and each followed by |
112 | * a char to indicate where it ends in the file. One byte is set |
113 | * to the length of the magic-number structure itself, which means |
114 | * that no two structures of different lengths can possibly |
115 | * compare equal even if by some chance they match up to the |
116 | * length of the shorter one. Finally, the whole magic number |
117 | * structure is memset to another random byte value before |
118 | * initialising any of these fields, so that padding in between |
119 | * can be readily identified. |
120 | */ |
121 | char ident[16]; /* human-readable string */ |
122 | |
123 | unsigned char magic_len; |
124 | |
125 | unsigned long long longlong_magic; |
126 | unsigned char postlonglong_char_magic; |
127 | |
128 | off_t offset_magic; |
129 | unsigned char postoffset_char_magic; |
130 | |
131 | size_t size_magic; |
132 | unsigned char postsize_char_magic; |
133 | |
134 | void *null_pointer; |
135 | unsigned char postptr_char_magic; |
136 | |
137 | unsigned long long_magic; |
138 | unsigned char postlong_char_magic; |
139 | |
140 | unsigned short short_magic; |
141 | unsigned char postshort_char_magic; |
142 | }; |
143 | |
70322ae3 |
144 | struct trie_header { |
bb013b1f |
145 | struct trie_magic magic; |
70322ae3 |
146 | off_t root, indexroot; |
147 | int count; |
148 | size_t maxpathlen; |
269fa2d1 |
149 | int pathsep; |
70322ae3 |
150 | }; |
151 | |
152 | /* Union only used for computing alignment */ |
153 | union trie_node { |
154 | struct trie_leaf leaf; |
155 | struct { /* fake trie_switch with indeterminate array length filled in */ |
156 | struct trie_common c; |
157 | int len; |
158 | struct trie_switchentry sw[1]; |
159 | } sw; |
160 | struct { /* fake trie_string with indeterminate array length filled in */ |
161 | struct trie_common c; |
162 | int stringlen; |
163 | off_t subnode; |
164 | char string[1]; |
165 | } str; |
166 | }; |
70322ae3 |
167 | #define TRIE_ALIGN alignof(union trie_node) |
168 | |
bb013b1f |
169 | static void setup_magic(struct trie_magic *th) |
170 | { |
171 | /* |
172 | * Magic values are chosen so that every byte value used is |
173 | * distinct (so that we can't fail to spot endianness issues), and |
174 | * we cast 64 bits of data into each integer type just in case |
175 | * the platform makes it longer than we expect it to be. |
176 | */ |
177 | |
178 | memset(th, 0xCDU, sizeof(*th)); |
179 | |
180 | th->magic_len = sizeof(*th); |
181 | |
182 | memcpy(th->ident, magic_ident_string, 16); |
183 | |
184 | th->longlong_magic = 0x5583F34D5D84F73CULL; |
185 | th->postlonglong_char_magic = 0xDDU; |
186 | |
187 | th->offset_magic = (off_t)0xB39BF9AD56D48E0BULL; |
188 | th->postoffset_char_magic = 0x95U; |
189 | |
190 | th->size_magic = (size_t)0x6EC752B0EEAEBAC1ULL; |
191 | th->postsize_char_magic = 0x77U; |
192 | |
193 | th->null_pointer = NULL; |
194 | th->postptr_char_magic = 0x71U; |
195 | |
196 | th->long_magic = (unsigned long)0xA81A5E1F44334716ULL; |
197 | th->postlong_char_magic = 0x99U; |
198 | |
199 | th->short_magic = (unsigned short)0x0C8BD7984B68D9FCULL; |
200 | th->postshort_char_magic = 0x35U; |
201 | } |
202 | |
70322ae3 |
203 | /* ---------------------------------------------------------------------- |
204 | * Trie-building functions. |
205 | */ |
206 | |
207 | struct tbswitch { |
208 | int len; |
209 | char c[256]; |
210 | off_t off[256]; |
211 | int count[256]; |
212 | }; |
213 | |
214 | struct triebuild { |
215 | int fd; |
216 | off_t offset; |
217 | char *lastpath; |
218 | int lastlen, lastsize; |
219 | off_t lastoff; |
220 | struct tbswitch *switches; |
221 | int switchsize; |
222 | size_t maxpathlen; |
223 | }; |
224 | |
225 | static void tb_seek(triebuild *tb, off_t off) |
226 | { |
227 | tb->offset = off; |
228 | if (lseek(tb->fd, off, SEEK_SET) < 0) { |
bf53e756 |
229 | fprintf(stderr, PNAME ": lseek: %s\n", strerror(errno)); |
70322ae3 |
230 | exit(1); |
231 | } |
232 | } |
233 | |
234 | static void tb_write(triebuild *tb, const void *buf, size_t len) |
235 | { |
236 | tb->offset += len; |
237 | while (len > 0) { |
238 | int ret = write(tb->fd, buf, len); |
239 | if (ret < 0) { |
bf53e756 |
240 | fprintf(stderr, PNAME ": write: %s\n", strerror(errno)); |
70322ae3 |
241 | exit(1); |
242 | } |
243 | len -= ret; |
244 | buf = (const void *)((const char *)buf + ret); |
245 | } |
246 | } |
247 | |
248 | static char trie_align_zeroes[TRIE_ALIGN]; |
249 | |
250 | static void tb_align(triebuild *tb) |
251 | { |
252 | int off = (TRIE_ALIGN - ((tb)->offset % TRIE_ALIGN)) % TRIE_ALIGN; |
253 | tb_write(tb, trie_align_zeroes, off); |
254 | } |
255 | |
256 | triebuild *triebuild_new(int fd) |
257 | { |
258 | triebuild *tb = snew(triebuild); |
259 | struct trie_header th; |
260 | |
261 | tb->fd = fd; |
262 | tb->lastpath = NULL; |
263 | tb->lastlen = tb->lastsize = 0; |
264 | tb->lastoff = 0; |
265 | tb->switches = NULL; |
266 | tb->switchsize = 0; |
267 | tb->maxpathlen = 0; |
268 | |
bb013b1f |
269 | setup_magic(&th.magic); |
70322ae3 |
270 | th.root = th.count = 0; |
271 | th.indexroot = 0; |
272 | th.maxpathlen = 0; |
269fa2d1 |
273 | th.pathsep = (unsigned char)pathsep; |
70322ae3 |
274 | |
275 | tb_seek(tb, 0); |
276 | tb_write(tb, &th, sizeof(th)); |
277 | |
278 | return tb; |
279 | } |
280 | |
281 | static off_t triebuild_unwind(triebuild *tb, int targetdepth, int *outcount) |
282 | { |
283 | off_t offset; |
284 | int count, depth; |
285 | |
286 | if (tb->lastoff == 0) { |
287 | *outcount = 0; |
288 | return 0; |
289 | } |
290 | |
291 | offset = tb->lastoff; |
292 | count = 1; |
293 | depth = tb->lastlen + 1; |
294 | |
295 | assert(depth >= targetdepth); |
296 | |
297 | while (depth > targetdepth) { |
298 | int odepth = depth; |
299 | while (depth > targetdepth && |
aea73b94 |
300 | (depth-1 >= tb->switchsize || !tb->switches || |
cf39b588 |
301 | tb->switches[depth-1].len == 0)) |
70322ae3 |
302 | depth--; |
303 | if (odepth > depth) { |
304 | /* |
305 | * Write out a string node. |
306 | */ |
307 | size_t nodesize = sizeof(struct trie_string) + odepth - depth; |
308 | struct trie_string *st = (struct trie_string *)smalloc(nodesize); |
309 | st->c.type = TRIE_STRING; |
310 | st->stringlen = odepth - depth; |
311 | st->subnode = offset; |
312 | memcpy(st->string, tb->lastpath + depth, odepth - depth); |
313 | tb_align(tb); |
314 | offset = tb->offset; |
315 | tb_write(tb, st, nodesize); |
316 | sfree(st); |
317 | } |
318 | |
319 | assert(depth >= targetdepth); |
320 | if (depth <= targetdepth) |
321 | break; |
322 | |
323 | /* |
324 | * Now we expect to be sitting just below a switch node. |
325 | * Add our final entry to it and write it out. |
326 | */ |
327 | depth--; |
328 | { |
329 | struct trie_switch *sw; |
330 | char *chars; |
331 | size_t nodesize; |
332 | int swlen = tb->switches[depth].len; |
333 | int i; |
334 | |
335 | assert(swlen > 0); |
336 | |
337 | tb->switches[depth].c[swlen] = tb->lastpath[depth]; |
338 | tb->switches[depth].off[swlen] = offset; |
339 | tb->switches[depth].count[swlen] = count; |
340 | swlen++; |
341 | |
342 | nodesize = sizeof(struct trie_switch) + |
343 | swlen * sizeof(struct trie_switchentry) + swlen; |
344 | sw = (struct trie_switch *)smalloc(nodesize); |
345 | chars = (char *)&sw->sw[swlen]; |
346 | |
347 | sw->c.type = TRIE_SWITCH; |
348 | sw->len = swlen; |
349 | count = 0; |
350 | for (i = 0; i < swlen; i++) { |
351 | sw->sw[i].subnode = tb->switches[depth].off[i]; |
352 | sw->sw[i].subcount = tb->switches[depth].count[i]; |
353 | chars[i] = tb->switches[depth].c[i]; |
354 | |
355 | count += tb->switches[depth].count[i]; |
356 | } |
357 | |
358 | tb_align(tb); |
359 | offset = tb->offset; |
360 | tb_write(tb, sw, nodesize); |
361 | sfree(sw); |
362 | |
363 | tb->switches[depth].len = 0; /* clear this node */ |
364 | } |
365 | } |
366 | |
367 | *outcount = count; |
368 | return offset; |
369 | } |
370 | |
371 | void triebuild_add(triebuild *tb, const char *pathname, |
372 | const struct trie_file *file) |
373 | { |
374 | int pathlen = strlen(pathname); |
375 | int depth; |
376 | |
377 | if (tb->maxpathlen < pathlen+1) |
378 | tb->maxpathlen = pathlen+1; |
379 | |
380 | if (tb->lastpath) { |
381 | off_t offset; |
382 | int count; |
383 | |
384 | /* |
385 | * Find the first differing character between this pathname |
386 | * and the previous one. |
387 | */ |
388 | int ret = triecmp(tb->lastpath, pathname, &depth); |
389 | assert(ret < 0); |
390 | |
391 | /* |
392 | * Finalise all nodes above this depth. |
393 | */ |
394 | offset = triebuild_unwind(tb, depth+1, &count); |
395 | |
396 | /* |
397 | * Add the final node we just acquired to the switch node |
398 | * at our chosen depth, creating it if it isn't already |
399 | * there. |
400 | */ |
401 | if (tb->switchsize <= depth) { |
402 | int oldsize = tb->switchsize; |
403 | tb->switchsize = depth * 3 / 2 + 64; |
404 | tb->switches = sresize(tb->switches, tb->switchsize, |
405 | struct tbswitch); |
406 | while (oldsize < tb->switchsize) |
407 | tb->switches[oldsize++].len = 0; |
408 | } |
409 | |
410 | tb->switches[depth].c[tb->switches[depth].len] = tb->lastpath[depth]; |
411 | tb->switches[depth].off[tb->switches[depth].len] = offset; |
412 | tb->switches[depth].count[tb->switches[depth].len] = count; |
413 | tb->switches[depth].len++; |
414 | } |
415 | |
416 | /* |
417 | * Write out a leaf node for the new file, and remember its |
418 | * file offset. |
419 | */ |
420 | { |
421 | struct trie_leaf leaf; |
422 | |
423 | leaf.c.type = TRIE_LEAF; |
424 | leaf.file = *file; /* structure copy */ |
425 | |
426 | tb_align(tb); |
427 | tb->lastoff = tb->offset; |
428 | tb_write(tb, &leaf, sizeof(leaf)); |
429 | } |
430 | |
431 | /* |
432 | * Store this pathname for comparison with the next one. |
433 | */ |
434 | if (tb->lastsize < pathlen+1) { |
435 | tb->lastsize = pathlen * 3 / 2 + 64; |
436 | tb->lastpath = sresize(tb->lastpath, tb->lastsize, char); |
437 | } |
438 | strcpy(tb->lastpath, pathname); |
439 | tb->lastlen = pathlen; |
440 | } |
441 | |
442 | int triebuild_finish(triebuild *tb) |
443 | { |
444 | struct trie_header th; |
445 | |
bb013b1f |
446 | setup_magic(&th.magic); |
70322ae3 |
447 | th.root = triebuild_unwind(tb, 0, &th.count); |
448 | th.indexroot = 0; |
449 | th.maxpathlen = tb->maxpathlen; |
269fa2d1 |
450 | th.pathsep = (unsigned char)pathsep; |
70322ae3 |
451 | |
452 | tb_seek(tb, 0); |
453 | tb_write(tb, &th, sizeof(th)); |
454 | |
455 | return th.count; |
456 | } |
457 | |
458 | void triebuild_free(triebuild *tb) |
459 | { |
460 | sfree(tb->switches); |
461 | sfree(tb->lastpath); |
462 | sfree(tb); |
463 | } |
464 | |
465 | /* ---------------------------------------------------------------------- |
05b0f827 |
466 | * Memory-mapped trie modification. |
467 | */ |
468 | |
469 | #define MNODE(t, off, type) \ |
470 | ((struct type *)((char *)(t) + (off))) |
471 | |
472 | static unsigned long long fake_atime_recurse(void *t, struct trie_common *node, |
473 | int last_seen_pathsep) |
474 | { |
475 | while (node->type == TRIE_STRING) { |
476 | struct trie_string *st = (struct trie_string *)node; |
477 | last_seen_pathsep = (st->string[st->stringlen-1] == pathsep); |
478 | node = MNODE(t, st->subnode, trie_common); |
479 | } |
480 | |
481 | if (node->type == TRIE_LEAF) { |
482 | struct trie_leaf *leaf = (struct trie_leaf *)node; |
483 | return leaf->file.atime; |
484 | } else if (assert(node->type == TRIE_SWITCH), 1) { |
485 | struct trie_switch *sw = (struct trie_switch *)node; |
486 | const char *chars = (const char *)&sw->sw[sw->len]; |
487 | unsigned long long max = 0, subdir, ret; |
488 | int i; |
489 | int slashindex = -1, bareindex = -1; |
490 | |
491 | /* |
492 | * First, process all the children of this node whose |
493 | * switch characters are not \0 or pathsep. We do this in |
494 | * reverse order so as to maintain best cache locality |
495 | * (tracking generally backwards through the file), though |
496 | * it doesn't matter semantically. |
497 | * |
498 | * For each of these children, we're just recursing into |
499 | * it to do any fixups required below it, and amalgamating |
500 | * the max atimes we get back. |
501 | */ |
502 | for (i = sw->len; i-- > 0 ;) { |
503 | if (chars[i] == '\0') { |
504 | bareindex = i; |
505 | } else if (chars[i] == pathsep) { |
506 | slashindex = i; |
507 | } else { |
508 | ret = fake_atime_recurse(t, MNODE(t, sw->sw[i].subnode, |
509 | trie_common), 0); |
510 | if (max < ret) |
511 | max = ret; |
512 | } |
513 | } |
514 | |
515 | /* |
516 | * Now we have at most two child nodes left to deal with: |
517 | * one with a slash (or general pathsep) and one with \0. |
518 | * |
519 | * If there's a slash node and a bare node, then the slash |
520 | * node contains precisely everything inside the directory |
521 | * described by the bare node; so we must retrieve the max |
522 | * atime for the slash node and use it to fix up the bare |
523 | * node. |
524 | * |
525 | * If there's only a bare node but the pathname leading up |
526 | * to this point ends in a slash, then _all_ of the child |
527 | * nodes of this node contain stuff inside the directory |
528 | * described by the bare node; so we use the whole of the |
529 | * maximum value we've computed so far to update the bare |
530 | * node. |
531 | */ |
532 | if (slashindex >= 0) { |
533 | ret = fake_atime_recurse(t, MNODE(t, sw->sw[slashindex].subnode, |
534 | trie_common), 1); |
535 | if (max < ret) |
536 | max = ret; |
537 | |
538 | subdir = ret; |
539 | } else if (last_seen_pathsep) { |
540 | subdir = max; |
541 | } else { |
542 | /* Don't update the bare subnode at all. */ |
543 | subdir = 0; |
544 | } |
545 | |
546 | if (bareindex >= 0) { |
547 | struct trie_leaf *leaf; |
548 | |
549 | leaf = MNODE(t, sw->sw[bareindex].subnode, trie_leaf); |
550 | |
551 | if (leaf && leaf->c.type == TRIE_LEAF) { |
552 | if (leaf->file.atime < subdir) |
553 | leaf->file.atime = subdir; |
554 | ret = leaf->file.atime; |
555 | } else { |
556 | /* Shouldn't really happen, but be cautious anyway */ |
557 | ret = fake_atime_recurse(t, &leaf->c, 0); |
558 | } |
559 | |
560 | if (max < ret) |
561 | max = ret; |
562 | } |
563 | |
564 | return max; |
565 | } |
a8d1009f |
566 | return 0; /* placate lint */ |
05b0f827 |
567 | } |
568 | |
569 | void trie_fake_dir_atimes(void *t) |
570 | { |
571 | struct trie_header *hdr = MNODE(t, 0, trie_header); |
572 | struct trie_common *node = MNODE(t, hdr->root, trie_common); |
573 | |
574 | fake_atime_recurse(t, node, 1); |
575 | } |
576 | |
577 | /* ---------------------------------------------------------------------- |
70322ae3 |
578 | * Querying functions. |
579 | */ |
580 | |
581 | #define NODE(t, off, type) \ |
582 | ((const struct type *)((const char *)(t) + (off))) |
583 | |
bb013b1f |
584 | int trie_check_magic(const void *t) |
585 | { |
586 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
587 | struct trie_magic magic; |
588 | |
589 | setup_magic(&magic); |
590 | return !memcmp(&magic, &hdr->magic, sizeof(magic)); |
591 | } |
592 | |
70322ae3 |
593 | size_t trie_maxpathlen(const void *t) |
594 | { |
595 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
596 | return hdr->maxpathlen; |
597 | } |
598 | |
599 | unsigned long trie_before(const void *t, const char *pathname) |
600 | { |
601 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
602 | int ret = 0, lastcount = hdr->count; |
603 | int len = 1 + strlen(pathname), depth = 0; |
604 | off_t off = hdr->root; |
605 | |
606 | while (1) { |
607 | const struct trie_common *node = NODE(t, off, trie_common); |
608 | if (node->type == TRIE_LEAF) { |
609 | if (depth < len) |
610 | ret += lastcount; /* _shouldn't_ happen, but in principle */ |
611 | return ret; |
612 | } else if (node->type == TRIE_STRING) { |
613 | const struct trie_string *st = NODE(t, off, trie_string); |
614 | |
615 | int offset; |
616 | int cmp = triencmp(st->string, st->stringlen, |
617 | pathname + depth, len-depth, &offset); |
618 | |
619 | if (offset < st->stringlen) { |
620 | if (cmp < 0) |
621 | ret += lastcount; |
622 | return ret; |
623 | } |
624 | |
625 | depth += st->stringlen; |
626 | off = st->subnode; |
627 | } else if (node->type == TRIE_SWITCH) { |
628 | const struct trie_switch *sw = NODE(t, off, trie_switch); |
629 | const char *chars = (const char *)&sw->sw[sw->len]; |
630 | int i; |
631 | |
632 | for (i = 0; i < sw->len; i++) { |
633 | int c = chars[i]; |
634 | int cmp = trieccmp(pathname[depth], c); |
635 | if (cmp > 0) |
636 | ret += sw->sw[i].subcount; |
637 | else if (cmp < 0) |
638 | return ret; |
639 | else { |
640 | off = sw->sw[i].subnode; |
641 | lastcount = sw->sw[i].subcount; |
642 | depth++; |
643 | break; |
644 | } |
645 | } |
646 | if (i == sw->len) |
647 | return ret; |
648 | } |
649 | } |
650 | } |
651 | |
652 | void trie_getpath(const void *t, unsigned long n, char *buf) |
653 | { |
654 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
655 | int depth = 0; |
656 | off_t off = hdr->root; |
657 | |
658 | while (1) { |
659 | const struct trie_common *node = NODE(t, off, trie_common); |
660 | if (node->type == TRIE_LEAF) { |
661 | assert(depth > 0 && buf[depth-1] == '\0'); |
662 | return; |
663 | } else if (node->type == TRIE_STRING) { |
664 | const struct trie_string *st = NODE(t, off, trie_string); |
665 | |
666 | memcpy(buf + depth, st->string, st->stringlen); |
667 | depth += st->stringlen; |
668 | off = st->subnode; |
669 | } else if (node->type == TRIE_SWITCH) { |
670 | const struct trie_switch *sw = NODE(t, off, trie_switch); |
671 | const char *chars = (const char *)&sw->sw[sw->len]; |
672 | int i; |
673 | |
674 | for (i = 0; i < sw->len; i++) { |
675 | if (n < sw->sw[i].subcount) { |
676 | buf[depth++] = chars[i]; |
677 | off = sw->sw[i].subnode; |
678 | break; |
679 | } else |
680 | n -= sw->sw[i].subcount; |
681 | } |
682 | assert(i < sw->len); |
683 | } |
684 | } |
16139d21 |
685 | } |
686 | |
687 | const struct trie_file *trie_getfile(const void *t, unsigned long n) |
688 | { |
689 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
690 | off_t off = hdr->root; |
691 | |
692 | while (1) { |
693 | const struct trie_common *node = NODE(t, off, trie_common); |
694 | if (node->type == TRIE_LEAF) { |
695 | const struct trie_leaf *leaf = NODE(t, off, trie_leaf); |
696 | return &leaf->file; |
697 | } else if (node->type == TRIE_STRING) { |
698 | const struct trie_string *st = NODE(t, off, trie_string); |
699 | off = st->subnode; |
700 | } else if (node->type == TRIE_SWITCH) { |
701 | const struct trie_switch *sw = NODE(t, off, trie_switch); |
702 | int i; |
703 | |
704 | for (i = 0; i < sw->len; i++) { |
705 | if (n < sw->sw[i].subcount) { |
706 | off = sw->sw[i].subnode; |
707 | break; |
708 | } else |
709 | n -= sw->sw[i].subcount; |
710 | } |
711 | assert(i < sw->len); |
712 | } |
713 | } |
714 | } |
70322ae3 |
715 | |
716 | unsigned long trie_count(const void *t) |
717 | { |
718 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
719 | return hdr->count; |
720 | } |
721 | |
269fa2d1 |
722 | char trie_pathsep(const void *t) |
723 | { |
724 | const struct trie_header *hdr = NODE(t, 0, trie_header); |
725 | return (char)hdr->pathsep; |
726 | } |
727 | |
70322ae3 |
728 | struct triewalk_switch { |
729 | const struct trie_switch *sw; |
730 | int pos, depth, count; |
731 | }; |
732 | struct triewalk { |
733 | const void *t; |
734 | struct triewalk_switch *switches; |
735 | int nswitches, switchsize; |
736 | int count; |
737 | }; |
738 | triewalk *triewalk_new(const void *vt) |
739 | { |
740 | triewalk *tw = snew(triewalk); |
741 | |
742 | tw->t = (const char *)vt; |
743 | tw->switches = NULL; |
744 | tw->switchsize = 0; |
745 | tw->nswitches = -1; |
746 | tw->count = 0; |
747 | |
748 | return tw; |
749 | } |
522edd92 |
750 | |
751 | void triewalk_rebase(triewalk *tw, const void *t) |
752 | { |
753 | ptrdiff_t diff = ((const unsigned char *)t - (const unsigned char *)(tw->t)); |
754 | int i; |
755 | |
756 | tw->t = t; |
757 | |
758 | for (i = 0; i < tw->nswitches; i++) |
759 | tw->switches[i].sw = (const struct trie_switch *) |
760 | ((const unsigned char *)(tw->switches[i].sw) + diff); |
761 | } |
762 | |
70322ae3 |
763 | const struct trie_file *triewalk_next(triewalk *tw, char *buf) |
764 | { |
765 | off_t off; |
766 | int depth; |
767 | |
768 | if (tw->nswitches < 0) { |
769 | const struct trie_header *hdr = NODE(tw->t, 0, trie_header); |
770 | off = hdr->root; |
771 | depth = 0; |
772 | tw->nswitches = 0; |
773 | } else { |
774 | while (1) { |
775 | int swpos; |
776 | const struct trie_switch *sw; |
777 | const char *chars; |
778 | |
779 | if (tw->nswitches == 0) { |
780 | assert(tw->count == NODE(tw->t, 0, trie_header)->count); |
781 | return NULL; /* run out of trie */ |
782 | } |
783 | |
784 | swpos = tw->switches[tw->nswitches-1].pos; |
785 | sw = tw->switches[tw->nswitches-1].sw; |
786 | chars = (const char *)&sw->sw[sw->len]; |
787 | |
788 | if (swpos < sw->len) { |
789 | depth = tw->switches[tw->nswitches-1].depth; |
790 | off = sw->sw[swpos].subnode; |
791 | if (buf) |
792 | buf[depth++] = chars[swpos]; |
793 | assert(tw->count == tw->switches[tw->nswitches-1].count); |
794 | tw->switches[tw->nswitches-1].count += sw->sw[swpos].subcount; |
795 | tw->switches[tw->nswitches-1].pos++; |
796 | break; |
797 | } |
798 | |
799 | tw->nswitches--; |
800 | } |
801 | } |
802 | |
803 | while (1) { |
804 | const struct trie_common *node = NODE(tw->t, off, trie_common); |
805 | if (node->type == TRIE_LEAF) { |
806 | const struct trie_leaf *lf = NODE(tw->t, off, trie_leaf); |
807 | if (buf) |
808 | assert(depth > 0 && buf[depth-1] == '\0'); |
809 | tw->count++; |
810 | return &lf->file; |
811 | } else if (node->type == TRIE_STRING) { |
812 | const struct trie_string *st = NODE(tw->t, off, trie_string); |
813 | |
814 | if (buf) |
815 | memcpy(buf + depth, st->string, st->stringlen); |
816 | depth += st->stringlen; |
817 | off = st->subnode; |
818 | } else if (node->type == TRIE_SWITCH) { |
819 | const struct trie_switch *sw = NODE(tw->t, off, trie_switch); |
820 | const char *chars = (const char *)&sw->sw[sw->len]; |
821 | |
822 | if (tw->nswitches >= tw->switchsize) { |
823 | tw->switchsize = tw->nswitches * 3 / 2 + 32; |
824 | tw->switches = sresize(tw->switches, tw->switchsize, |
825 | struct triewalk_switch); |
826 | } |
827 | |
828 | tw->switches[tw->nswitches].sw = sw; |
829 | tw->switches[tw->nswitches].pos = 1; |
830 | tw->switches[tw->nswitches].depth = depth; |
831 | tw->switches[tw->nswitches].count = tw->count + sw->sw[0].subcount; |
832 | off = sw->sw[0].subnode; |
833 | if (buf) |
834 | buf[depth++] = chars[0]; |
835 | tw->nswitches++; |
836 | } |
837 | } |
838 | } |
839 | void triewalk_free(triewalk *tw) |
840 | { |
841 | sfree(tw->switches); |
842 | sfree(tw); |
843 | } |
844 | |
845 | void trie_set_index_offset(void *t, off_t ptr) |
846 | { |
847 | ((struct trie_header *)t)->indexroot = ptr; |
848 | } |
849 | off_t trie_get_index_offset(const void *t) |
850 | { |
851 | return ((const struct trie_header *)t)->indexroot; |
852 | } |
256c29a2 |
853 | |
854 | void make_successor(char *pathbuf) |
855 | { |
856 | int len = strlen(pathbuf); |
857 | if (len > 0 && pathbuf[len-1] == pathsep) |
858 | len--; |
859 | pathbuf[len] = '\001'; |
860 | pathbuf[len+1] = '\0'; |
861 | } |