Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / trie.c
1 /*
2 * trie.c: implementation of trie.h.
3 */
4
5 #include "agedu.h"
6 #include "alloc.h"
7 #include "trie.h"
8
9 #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
10
11 /*
12 * Compare functions for pathnames. Returns the relative order of
13 * the names, like strcmp; also passes back the offset of the
14 * first differing character if desired.
15 */
16 static int trieccmp(unsigned char a, unsigned char b)
17 {
18 int ia = (a == '\0' ? '\0' : a == pathsep ? '\1' : a+1);
19 int ib = (b == '\0' ? '\0' : b == pathsep ? '\1' : b+1);
20 return ia - ib;
21 }
22
23 static int triencmp(const char *a, size_t alen,
24 const char *b, size_t blen, int *offset)
25 {
26 int off = 0;
27 while (off < alen && off < blen && a[off] == b[off])
28 off++;
29 if (offset)
30 *offset = off;
31 if (off == alen || off == blen) return (off == blen) - (off == alen);
32 return trieccmp(a[off], b[off]);
33 }
34
35 static int triecmp(const char *a, const char *b, int *offset)
36 {
37 return triencmp(a, strlen(a), b, strlen(b), offset);
38 }
39
40 /* ----------------------------------------------------------------------
41 * Trie node structures.
42 *
43 * The trie format stored in the file consists of three distinct
44 * node types, each with a distinguishing type field at the start.
45 *
46 * TRIE_LEAF is a leaf node; it contains an actual trie_file
47 * structure, and indicates that when you're searching down the
48 * trie with a string, you should now expect to encounter
49 * end-of-string.
50 *
51 * TRIE_SWITCH indicates that the set of strings in the trie
52 * include strings with more than one distinct character after the
53 * prefix leading up to this point. Hence, it stores multiple
54 * subnode pointers and a different single character for each one.
55 *
56 * TRIE_STRING indicates that at this point everything in the trie
57 * has the same next few characters; it stores a single mandatory
58 * string fragment and exactly one subnode pointer.
59 */
60 enum {
61 TRIE_LEAF = 0x7fffe000,
62 TRIE_SWITCH,
63 TRIE_STRING
64 };
65
66 struct trie_common {
67 int type;
68 };
69
70 struct trie_switchentry {
71 off_t subnode;
72 int subcount;
73 };
74
75 struct trie_leaf {
76 struct trie_common c;
77 struct trie_file file;
78 };
79
80 struct trie_switch {
81 struct trie_common c;
82 /*
83 * sw[0] to sw[len-1] give the subnode pointers and element
84 * counts. At &sw[len] is stored len single bytes which are
85 * the characters corresponding to each subnode.
86 */
87 int len;
88 struct trie_switchentry sw[];
89 };
90
91 struct trie_string {
92 struct trie_common c;
93 int stringlen;
94 off_t subnode;
95 char string[];
96 };
97
98 static const char magic_ident_string[16] = "agedu index file";
99 struct trie_magic {
100 /*
101 * 'Magic numbers' to go at the start of an agedu index file.
102 * These are checked (using trie_check_magic) by every agedu mode
103 * which reads a pre-existing index.
104 *
105 * As well as identifying an agedu file from any other kind of
106 * file, this magic-number structure is also intended to detect
107 * agedu files which were written on the wrong platform and hence
108 * whose structure layouts are incompatible. To make that as
109 * reliable as possible, I design the structure of magic numbers
110 * as follows: it contains one of each integer type I might use,
111 * each containing a different magic number, and each followed by
112 * a char to indicate where it ends in the file. One byte is set
113 * to the length of the magic-number structure itself, which means
114 * that no two structures of different lengths can possibly
115 * compare equal even if by some chance they match up to the
116 * length of the shorter one. Finally, the whole magic number
117 * structure is memset to another random byte value before
118 * initialising any of these fields, so that padding in between
119 * can be readily identified.
120 */
121 char ident[16]; /* human-readable string */
122
123 unsigned char magic_len;
124
125 unsigned long long longlong_magic;
126 unsigned char postlonglong_char_magic;
127
128 off_t offset_magic;
129 unsigned char postoffset_char_magic;
130
131 size_t size_magic;
132 unsigned char postsize_char_magic;
133
134 void *null_pointer;
135 unsigned char postptr_char_magic;
136
137 unsigned long long_magic;
138 unsigned char postlong_char_magic;
139
140 unsigned short short_magic;
141 unsigned char postshort_char_magic;
142 };
143
144 struct trie_header {
145 struct trie_magic magic;
146 off_t root, indexroot;
147 int count;
148 size_t maxpathlen;
149 int pathsep;
150 };
151
152 /* Union only used for computing alignment */
153 union trie_node {
154 struct trie_leaf leaf;
155 struct { /* fake trie_switch with indeterminate array length filled in */
156 struct trie_common c;
157 int len;
158 struct trie_switchentry sw[1];
159 } sw;
160 struct { /* fake trie_string with indeterminate array length filled in */
161 struct trie_common c;
162 int stringlen;
163 off_t subnode;
164 char string[1];
165 } str;
166 };
167 #define TRIE_ALIGN alignof(union trie_node)
168
169 static void setup_magic(struct trie_magic *th)
170 {
171 /*
172 * Magic values are chosen so that every byte value used is
173 * distinct (so that we can't fail to spot endianness issues), and
174 * we cast 64 bits of data into each integer type just in case
175 * the platform makes it longer than we expect it to be.
176 */
177
178 memset(th, 0xCDU, sizeof(*th));
179
180 th->magic_len = sizeof(*th);
181
182 memcpy(th->ident, magic_ident_string, 16);
183
184 th->longlong_magic = 0x5583F34D5D84F73CULL;
185 th->postlonglong_char_magic = 0xDDU;
186
187 th->offset_magic = (off_t)0xB39BF9AD56D48E0BULL;
188 th->postoffset_char_magic = 0x95U;
189
190 th->size_magic = (size_t)0x6EC752B0EEAEBAC1ULL;
191 th->postsize_char_magic = 0x77U;
192
193 th->null_pointer = NULL;
194 th->postptr_char_magic = 0x71U;
195
196 th->long_magic = (unsigned long)0xA81A5E1F44334716ULL;
197 th->postlong_char_magic = 0x99U;
198
199 th->short_magic = (unsigned short)0x0C8BD7984B68D9FCULL;
200 th->postshort_char_magic = 0x35U;
201 }
202
203 /* ----------------------------------------------------------------------
204 * Trie-building functions.
205 */
206
207 struct tbswitch {
208 int len;
209 char c[256];
210 off_t off[256];
211 int count[256];
212 };
213
214 struct triebuild {
215 int fd;
216 off_t offset;
217 char *lastpath;
218 int lastlen, lastsize;
219 off_t lastoff;
220 struct tbswitch *switches;
221 int switchsize;
222 size_t maxpathlen;
223 };
224
225 static void tb_seek(triebuild *tb, off_t off)
226 {
227 tb->offset = off;
228 if (lseek(tb->fd, off, SEEK_SET) < 0) {
229 fprintf(stderr, PNAME ": lseek: %s\n", strerror(errno));
230 exit(1);
231 }
232 }
233
234 static void tb_write(triebuild *tb, const void *buf, size_t len)
235 {
236 tb->offset += len;
237 while (len > 0) {
238 int ret = write(tb->fd, buf, len);
239 if (ret < 0) {
240 fprintf(stderr, PNAME ": write: %s\n", strerror(errno));
241 exit(1);
242 }
243 len -= ret;
244 buf = (const void *)((const char *)buf + ret);
245 }
246 }
247
248 static char trie_align_zeroes[TRIE_ALIGN];
249
250 static void tb_align(triebuild *tb)
251 {
252 int off = (TRIE_ALIGN - ((tb)->offset % TRIE_ALIGN)) % TRIE_ALIGN;
253 tb_write(tb, trie_align_zeroes, off);
254 }
255
256 triebuild *triebuild_new(int fd)
257 {
258 triebuild *tb = snew(triebuild);
259 struct trie_header th;
260
261 tb->fd = fd;
262 tb->lastpath = NULL;
263 tb->lastlen = tb->lastsize = 0;
264 tb->lastoff = 0;
265 tb->switches = NULL;
266 tb->switchsize = 0;
267 tb->maxpathlen = 0;
268
269 setup_magic(&th.magic);
270 th.root = th.count = 0;
271 th.indexroot = 0;
272 th.maxpathlen = 0;
273 th.pathsep = (unsigned char)pathsep;
274
275 tb_seek(tb, 0);
276 tb_write(tb, &th, sizeof(th));
277
278 return tb;
279 }
280
281 static off_t triebuild_unwind(triebuild *tb, int targetdepth, int *outcount)
282 {
283 off_t offset;
284 int count, depth;
285
286 if (tb->lastoff == 0) {
287 *outcount = 0;
288 return 0;
289 }
290
291 offset = tb->lastoff;
292 count = 1;
293 depth = tb->lastlen + 1;
294
295 assert(depth >= targetdepth);
296
297 while (depth > targetdepth) {
298 int odepth = depth;
299 while (depth > targetdepth &&
300 (depth-1 >= tb->switchsize || !tb->switches ||
301 tb->switches[depth-1].len == 0))
302 depth--;
303 if (odepth > depth) {
304 /*
305 * Write out a string node.
306 */
307 size_t nodesize = sizeof(struct trie_string) + odepth - depth;
308 struct trie_string *st = (struct trie_string *)smalloc(nodesize);
309 st->c.type = TRIE_STRING;
310 st->stringlen = odepth - depth;
311 st->subnode = offset;
312 memcpy(st->string, tb->lastpath + depth, odepth - depth);
313 tb_align(tb);
314 offset = tb->offset;
315 tb_write(tb, st, nodesize);
316 sfree(st);
317 }
318
319 assert(depth >= targetdepth);
320 if (depth <= targetdepth)
321 break;
322
323 /*
324 * Now we expect to be sitting just below a switch node.
325 * Add our final entry to it and write it out.
326 */
327 depth--;
328 {
329 struct trie_switch *sw;
330 char *chars;
331 size_t nodesize;
332 int swlen = tb->switches[depth].len;
333 int i;
334
335 assert(swlen > 0);
336
337 tb->switches[depth].c[swlen] = tb->lastpath[depth];
338 tb->switches[depth].off[swlen] = offset;
339 tb->switches[depth].count[swlen] = count;
340 swlen++;
341
342 nodesize = sizeof(struct trie_switch) +
343 swlen * sizeof(struct trie_switchentry) + swlen;
344 sw = (struct trie_switch *)smalloc(nodesize);
345 chars = (char *)&sw->sw[swlen];
346
347 sw->c.type = TRIE_SWITCH;
348 sw->len = swlen;
349 count = 0;
350 for (i = 0; i < swlen; i++) {
351 sw->sw[i].subnode = tb->switches[depth].off[i];
352 sw->sw[i].subcount = tb->switches[depth].count[i];
353 chars[i] = tb->switches[depth].c[i];
354
355 count += tb->switches[depth].count[i];
356 }
357
358 tb_align(tb);
359 offset = tb->offset;
360 tb_write(tb, sw, nodesize);
361 sfree(sw);
362
363 tb->switches[depth].len = 0; /* clear this node */
364 }
365 }
366
367 *outcount = count;
368 return offset;
369 }
370
371 void triebuild_add(triebuild *tb, const char *pathname,
372 const struct trie_file *file)
373 {
374 int pathlen = strlen(pathname);
375 int depth;
376
377 if (tb->maxpathlen < pathlen+1)
378 tb->maxpathlen = pathlen+1;
379
380 if (tb->lastpath) {
381 off_t offset;
382 int count;
383
384 /*
385 * Find the first differing character between this pathname
386 * and the previous one.
387 */
388 int ret = triecmp(tb->lastpath, pathname, &depth);
389 assert(ret < 0);
390
391 /*
392 * Finalise all nodes above this depth.
393 */
394 offset = triebuild_unwind(tb, depth+1, &count);
395
396 /*
397 * Add the final node we just acquired to the switch node
398 * at our chosen depth, creating it if it isn't already
399 * there.
400 */
401 if (tb->switchsize <= depth) {
402 int oldsize = tb->switchsize;
403 tb->switchsize = depth * 3 / 2 + 64;
404 tb->switches = sresize(tb->switches, tb->switchsize,
405 struct tbswitch);
406 while (oldsize < tb->switchsize)
407 tb->switches[oldsize++].len = 0;
408 }
409
410 tb->switches[depth].c[tb->switches[depth].len] = tb->lastpath[depth];
411 tb->switches[depth].off[tb->switches[depth].len] = offset;
412 tb->switches[depth].count[tb->switches[depth].len] = count;
413 tb->switches[depth].len++;
414 }
415
416 /*
417 * Write out a leaf node for the new file, and remember its
418 * file offset.
419 */
420 {
421 struct trie_leaf leaf;
422
423 leaf.c.type = TRIE_LEAF;
424 leaf.file = *file; /* structure copy */
425
426 tb_align(tb);
427 tb->lastoff = tb->offset;
428 tb_write(tb, &leaf, sizeof(leaf));
429 }
430
431 /*
432 * Store this pathname for comparison with the next one.
433 */
434 if (tb->lastsize < pathlen+1) {
435 tb->lastsize = pathlen * 3 / 2 + 64;
436 tb->lastpath = sresize(tb->lastpath, tb->lastsize, char);
437 }
438 strcpy(tb->lastpath, pathname);
439 tb->lastlen = pathlen;
440 }
441
442 int triebuild_finish(triebuild *tb)
443 {
444 struct trie_header th;
445
446 setup_magic(&th.magic);
447 th.root = triebuild_unwind(tb, 0, &th.count);
448 th.indexroot = 0;
449 th.maxpathlen = tb->maxpathlen;
450 th.pathsep = (unsigned char)pathsep;
451
452 tb_seek(tb, 0);
453 tb_write(tb, &th, sizeof(th));
454
455 return th.count;
456 }
457
458 void triebuild_free(triebuild *tb)
459 {
460 sfree(tb->switches);
461 sfree(tb->lastpath);
462 sfree(tb);
463 }
464
465 /* ----------------------------------------------------------------------
466 * Memory-mapped trie modification.
467 */
468
469 #define MNODE(t, off, type) \
470 ((struct type *)((char *)(t) + (off)))
471
472 static unsigned long long fake_atime_recurse(void *t, struct trie_common *node,
473 int last_seen_pathsep)
474 {
475 while (node->type == TRIE_STRING) {
476 struct trie_string *st = (struct trie_string *)node;
477 last_seen_pathsep = (st->string[st->stringlen-1] == pathsep);
478 node = MNODE(t, st->subnode, trie_common);
479 }
480
481 if (node->type == TRIE_LEAF) {
482 struct trie_leaf *leaf = (struct trie_leaf *)node;
483 return leaf->file.atime;
484 } else if (assert(node->type == TRIE_SWITCH), 1) {
485 struct trie_switch *sw = (struct trie_switch *)node;
486 const char *chars = (const char *)&sw->sw[sw->len];
487 unsigned long long max = 0, subdir, ret;
488 int i;
489 int slashindex = -1, bareindex = -1;
490
491 /*
492 * First, process all the children of this node whose
493 * switch characters are not \0 or pathsep. We do this in
494 * reverse order so as to maintain best cache locality
495 * (tracking generally backwards through the file), though
496 * it doesn't matter semantically.
497 *
498 * For each of these children, we're just recursing into
499 * it to do any fixups required below it, and amalgamating
500 * the max atimes we get back.
501 */
502 for (i = sw->len; i-- > 0 ;) {
503 if (chars[i] == '\0') {
504 bareindex = i;
505 } else if (chars[i] == pathsep) {
506 slashindex = i;
507 } else {
508 ret = fake_atime_recurse(t, MNODE(t, sw->sw[i].subnode,
509 trie_common), 0);
510 if (max < ret)
511 max = ret;
512 }
513 }
514
515 /*
516 * Now we have at most two child nodes left to deal with:
517 * one with a slash (or general pathsep) and one with \0.
518 *
519 * If there's a slash node and a bare node, then the slash
520 * node contains precisely everything inside the directory
521 * described by the bare node; so we must retrieve the max
522 * atime for the slash node and use it to fix up the bare
523 * node.
524 *
525 * If there's only a bare node but the pathname leading up
526 * to this point ends in a slash, then _all_ of the child
527 * nodes of this node contain stuff inside the directory
528 * described by the bare node; so we use the whole of the
529 * maximum value we've computed so far to update the bare
530 * node.
531 */
532 if (slashindex >= 0) {
533 ret = fake_atime_recurse(t, MNODE(t, sw->sw[slashindex].subnode,
534 trie_common), 1);
535 if (max < ret)
536 max = ret;
537
538 subdir = ret;
539 } else if (last_seen_pathsep) {
540 subdir = max;
541 } else {
542 /* Don't update the bare subnode at all. */
543 subdir = 0;
544 }
545
546 if (bareindex >= 0) {
547 struct trie_leaf *leaf;
548
549 leaf = MNODE(t, sw->sw[bareindex].subnode, trie_leaf);
550
551 if (leaf && leaf->c.type == TRIE_LEAF) {
552 if (leaf->file.atime < subdir)
553 leaf->file.atime = subdir;
554 ret = leaf->file.atime;
555 } else {
556 /* Shouldn't really happen, but be cautious anyway */
557 ret = fake_atime_recurse(t, &leaf->c, 0);
558 }
559
560 if (max < ret)
561 max = ret;
562 }
563
564 return max;
565 }
566 return 0; /* placate lint */
567 }
568
569 void trie_fake_dir_atimes(void *t)
570 {
571 struct trie_header *hdr = MNODE(t, 0, trie_header);
572 struct trie_common *node = MNODE(t, hdr->root, trie_common);
573
574 fake_atime_recurse(t, node, 1);
575 }
576
577 /* ----------------------------------------------------------------------
578 * Querying functions.
579 */
580
581 #define NODE(t, off, type) \
582 ((const struct type *)((const char *)(t) + (off)))
583
584 int trie_check_magic(const void *t)
585 {
586 const struct trie_header *hdr = NODE(t, 0, trie_header);
587 struct trie_magic magic;
588
589 setup_magic(&magic);
590 return !memcmp(&magic, &hdr->magic, sizeof(magic));
591 }
592
593 size_t trie_maxpathlen(const void *t)
594 {
595 const struct trie_header *hdr = NODE(t, 0, trie_header);
596 return hdr->maxpathlen;
597 }
598
599 unsigned long trie_before(const void *t, const char *pathname)
600 {
601 const struct trie_header *hdr = NODE(t, 0, trie_header);
602 int ret = 0, lastcount = hdr->count;
603 int len = 1 + strlen(pathname), depth = 0;
604 off_t off = hdr->root;
605
606 while (1) {
607 const struct trie_common *node = NODE(t, off, trie_common);
608 if (node->type == TRIE_LEAF) {
609 if (depth < len)
610 ret += lastcount; /* _shouldn't_ happen, but in principle */
611 return ret;
612 } else if (node->type == TRIE_STRING) {
613 const struct trie_string *st = NODE(t, off, trie_string);
614
615 int offset;
616 int cmp = triencmp(st->string, st->stringlen,
617 pathname + depth, len-depth, &offset);
618
619 if (offset < st->stringlen) {
620 if (cmp < 0)
621 ret += lastcount;
622 return ret;
623 }
624
625 depth += st->stringlen;
626 off = st->subnode;
627 } else if (node->type == TRIE_SWITCH) {
628 const struct trie_switch *sw = NODE(t, off, trie_switch);
629 const char *chars = (const char *)&sw->sw[sw->len];
630 int i;
631
632 for (i = 0; i < sw->len; i++) {
633 int c = chars[i];
634 int cmp = trieccmp(pathname[depth], c);
635 if (cmp > 0)
636 ret += sw->sw[i].subcount;
637 else if (cmp < 0)
638 return ret;
639 else {
640 off = sw->sw[i].subnode;
641 lastcount = sw->sw[i].subcount;
642 depth++;
643 break;
644 }
645 }
646 if (i == sw->len)
647 return ret;
648 }
649 }
650 }
651
652 void trie_getpath(const void *t, unsigned long n, char *buf)
653 {
654 const struct trie_header *hdr = NODE(t, 0, trie_header);
655 int depth = 0;
656 off_t off = hdr->root;
657
658 while (1) {
659 const struct trie_common *node = NODE(t, off, trie_common);
660 if (node->type == TRIE_LEAF) {
661 assert(depth > 0 && buf[depth-1] == '\0');
662 return;
663 } else if (node->type == TRIE_STRING) {
664 const struct trie_string *st = NODE(t, off, trie_string);
665
666 memcpy(buf + depth, st->string, st->stringlen);
667 depth += st->stringlen;
668 off = st->subnode;
669 } else if (node->type == TRIE_SWITCH) {
670 const struct trie_switch *sw = NODE(t, off, trie_switch);
671 const char *chars = (const char *)&sw->sw[sw->len];
672 int i;
673
674 for (i = 0; i < sw->len; i++) {
675 if (n < sw->sw[i].subcount) {
676 buf[depth++] = chars[i];
677 off = sw->sw[i].subnode;
678 break;
679 } else
680 n -= sw->sw[i].subcount;
681 }
682 assert(i < sw->len);
683 }
684 }
685 }
686
687 const struct trie_file *trie_getfile(const void *t, unsigned long n)
688 {
689 const struct trie_header *hdr = NODE(t, 0, trie_header);
690 off_t off = hdr->root;
691
692 while (1) {
693 const struct trie_common *node = NODE(t, off, trie_common);
694 if (node->type == TRIE_LEAF) {
695 const struct trie_leaf *leaf = NODE(t, off, trie_leaf);
696 return &leaf->file;
697 } else if (node->type == TRIE_STRING) {
698 const struct trie_string *st = NODE(t, off, trie_string);
699 off = st->subnode;
700 } else if (node->type == TRIE_SWITCH) {
701 const struct trie_switch *sw = NODE(t, off, trie_switch);
702 int i;
703
704 for (i = 0; i < sw->len; i++) {
705 if (n < sw->sw[i].subcount) {
706 off = sw->sw[i].subnode;
707 break;
708 } else
709 n -= sw->sw[i].subcount;
710 }
711 assert(i < sw->len);
712 }
713 }
714 }
715
716 unsigned long trie_count(const void *t)
717 {
718 const struct trie_header *hdr = NODE(t, 0, trie_header);
719 return hdr->count;
720 }
721
722 char trie_pathsep(const void *t)
723 {
724 const struct trie_header *hdr = NODE(t, 0, trie_header);
725 return (char)hdr->pathsep;
726 }
727
728 struct triewalk_switch {
729 const struct trie_switch *sw;
730 int pos, depth, count;
731 };
732 struct triewalk {
733 const void *t;
734 struct triewalk_switch *switches;
735 int nswitches, switchsize;
736 int count;
737 };
738 triewalk *triewalk_new(const void *vt)
739 {
740 triewalk *tw = snew(triewalk);
741
742 tw->t = (const char *)vt;
743 tw->switches = NULL;
744 tw->switchsize = 0;
745 tw->nswitches = -1;
746 tw->count = 0;
747
748 return tw;
749 }
750
751 void triewalk_rebase(triewalk *tw, const void *t)
752 {
753 ptrdiff_t diff = ((const unsigned char *)t - (const unsigned char *)(tw->t));
754 int i;
755
756 tw->t = t;
757
758 for (i = 0; i < tw->nswitches; i++)
759 tw->switches[i].sw = (const struct trie_switch *)
760 ((const unsigned char *)(tw->switches[i].sw) + diff);
761 }
762
763 const struct trie_file *triewalk_next(triewalk *tw, char *buf)
764 {
765 off_t off;
766 int depth;
767
768 if (tw->nswitches < 0) {
769 const struct trie_header *hdr = NODE(tw->t, 0, trie_header);
770 off = hdr->root;
771 depth = 0;
772 tw->nswitches = 0;
773 } else {
774 while (1) {
775 int swpos;
776 const struct trie_switch *sw;
777 const char *chars;
778
779 if (tw->nswitches == 0) {
780 assert(tw->count == NODE(tw->t, 0, trie_header)->count);
781 return NULL; /* run out of trie */
782 }
783
784 swpos = tw->switches[tw->nswitches-1].pos;
785 sw = tw->switches[tw->nswitches-1].sw;
786 chars = (const char *)&sw->sw[sw->len];
787
788 if (swpos < sw->len) {
789 depth = tw->switches[tw->nswitches-1].depth;
790 off = sw->sw[swpos].subnode;
791 if (buf)
792 buf[depth++] = chars[swpos];
793 assert(tw->count == tw->switches[tw->nswitches-1].count);
794 tw->switches[tw->nswitches-1].count += sw->sw[swpos].subcount;
795 tw->switches[tw->nswitches-1].pos++;
796 break;
797 }
798
799 tw->nswitches--;
800 }
801 }
802
803 while (1) {
804 const struct trie_common *node = NODE(tw->t, off, trie_common);
805 if (node->type == TRIE_LEAF) {
806 const struct trie_leaf *lf = NODE(tw->t, off, trie_leaf);
807 if (buf)
808 assert(depth > 0 && buf[depth-1] == '\0');
809 tw->count++;
810 return &lf->file;
811 } else if (node->type == TRIE_STRING) {
812 const struct trie_string *st = NODE(tw->t, off, trie_string);
813
814 if (buf)
815 memcpy(buf + depth, st->string, st->stringlen);
816 depth += st->stringlen;
817 off = st->subnode;
818 } else if (node->type == TRIE_SWITCH) {
819 const struct trie_switch *sw = NODE(tw->t, off, trie_switch);
820 const char *chars = (const char *)&sw->sw[sw->len];
821
822 if (tw->nswitches >= tw->switchsize) {
823 tw->switchsize = tw->nswitches * 3 / 2 + 32;
824 tw->switches = sresize(tw->switches, tw->switchsize,
825 struct triewalk_switch);
826 }
827
828 tw->switches[tw->nswitches].sw = sw;
829 tw->switches[tw->nswitches].pos = 1;
830 tw->switches[tw->nswitches].depth = depth;
831 tw->switches[tw->nswitches].count = tw->count + sw->sw[0].subcount;
832 off = sw->sw[0].subnode;
833 if (buf)
834 buf[depth++] = chars[0];
835 tw->nswitches++;
836 }
837 }
838 }
839 void triewalk_free(triewalk *tw)
840 {
841 sfree(tw->switches);
842 sfree(tw);
843 }
844
845 void trie_set_index_offset(void *t, off_t ptr)
846 {
847 ((struct trie_header *)t)->indexroot = ptr;
848 }
849 off_t trie_get_index_offset(const void *t)
850 {
851 return ((const struct trie_header *)t)->indexroot;
852 }
853
854 void make_successor(char *pathbuf)
855 {
856 int len = strlen(pathbuf);
857 if (len > 0 && pathbuf[len-1] == pathsep)
858 len--;
859 pathbuf[len] = '\001';
860 pathbuf[len+1] = '\0';
861 }