X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/blobdiff_plain/70322ae3751bc07ac749dffad79a5f3420e67b55..7e25423cd18ab429b13c6e2ea920d9c44c0f263c:/trie.c diff --git a/trie.c b/trie.c index f0b21df..ddadddb 100644 --- a/trie.c +++ b/trie.c @@ -2,16 +2,8 @@ * trie.c: implementation of trie.h. */ -#include -#include -#include -#include -#include - -#include -#include - -#include "malloc.h" +#include "agedu.h" +#include "alloc.h" #include "trie.h" #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) ) @@ -23,9 +15,9 @@ */ static int trieccmp(unsigned char a, unsigned char b) { - a = (a == '\0' ? '\0' : a == '/' ? '\1' : a+1); - b = (b == '\0' ? '\0' : b == '/' ? '\1' : b+1); - return a - b; + a = (a == '\0' ? '\0' : a == pathsep ? '\1' : a+1); + b = (b == '\0' ? '\0' : b == pathsep ? '\1' : b+1); + return (int)a - (int)b; } static int triencmp(const char *a, size_t alen, @@ -108,6 +100,7 @@ struct trie_header { off_t root, indexroot; int count; size_t maxpathlen; + int pathsep; }; /* Union only used for computing alignment */ @@ -154,7 +147,7 @@ static void tb_seek(triebuild *tb, off_t off) { tb->offset = off; if (lseek(tb->fd, off, SEEK_SET) < 0) { - fprintf(stderr, "agedu: lseek: %s\n", strerror(errno)); + fprintf(stderr, PNAME ": lseek: %s\n", strerror(errno)); exit(1); } } @@ -165,7 +158,7 @@ static void tb_write(triebuild *tb, const void *buf, size_t len) while (len > 0) { int ret = write(tb->fd, buf, len); if (ret < 0) { - fprintf(stderr, "agedu: write: %s\n", strerror(errno)); + fprintf(stderr, PNAME ": write: %s\n", strerror(errno)); exit(1); } len -= ret; @@ -198,6 +191,7 @@ triebuild *triebuild_new(int fd) th.root = th.count = 0; th.indexroot = 0; th.maxpathlen = 0; + th.pathsep = (unsigned char)pathsep; tb_seek(tb, 0); tb_write(tb, &th, sizeof(th)); @@ -224,7 +218,8 @@ static off_t triebuild_unwind(triebuild *tb, int targetdepth, int *outcount) while (depth > targetdepth) { int odepth = depth; while (depth > targetdepth && - (depth-1 > tb->switchsize || tb->switches[depth-1].len == 0)) + (depth-1 > tb->switchsize || !tb->switches || + tb->switches[depth-1].len == 0)) depth--; if (odepth > depth) { /* @@ -373,6 +368,7 @@ int triebuild_finish(triebuild *tb) th.root = triebuild_unwind(tb, 0, &th.count); th.indexroot = 0; th.maxpathlen = tb->maxpathlen; + th.pathsep = (unsigned char)pathsep; tb_seek(tb, 0); tb_write(tb, &th, sizeof(th)); @@ -388,6 +384,117 @@ void triebuild_free(triebuild *tb) } /* ---------------------------------------------------------------------- + * Memory-mapped trie modification. + */ + +#define MNODE(t, off, type) \ + ((struct type *)((char *)(t) + (off))) + +static unsigned long long fake_atime_recurse(void *t, struct trie_common *node, + int last_seen_pathsep) +{ + while (node->type == TRIE_STRING) { + struct trie_string *st = (struct trie_string *)node; + last_seen_pathsep = (st->string[st->stringlen-1] == pathsep); + node = MNODE(t, st->subnode, trie_common); + } + + if (node->type == TRIE_LEAF) { + struct trie_leaf *leaf = (struct trie_leaf *)node; + return leaf->file.atime; + } else if (assert(node->type == TRIE_SWITCH), 1) { + struct trie_switch *sw = (struct trie_switch *)node; + const char *chars = (const char *)&sw->sw[sw->len]; + unsigned long long max = 0, subdir, ret; + int i; + int slashindex = -1, bareindex = -1; + + /* + * First, process all the children of this node whose + * switch characters are not \0 or pathsep. We do this in + * reverse order so as to maintain best cache locality + * (tracking generally backwards through the file), though + * it doesn't matter semantically. + * + * For each of these children, we're just recursing into + * it to do any fixups required below it, and amalgamating + * the max atimes we get back. + */ + for (i = sw->len; i-- > 0 ;) { + if (chars[i] == '\0') { + bareindex = i; + } else if (chars[i] == pathsep) { + slashindex = i; + } else { + ret = fake_atime_recurse(t, MNODE(t, sw->sw[i].subnode, + trie_common), 0); + if (max < ret) + max = ret; + } + } + + /* + * Now we have at most two child nodes left to deal with: + * one with a slash (or general pathsep) and one with \0. + * + * If there's a slash node and a bare node, then the slash + * node contains precisely everything inside the directory + * described by the bare node; so we must retrieve the max + * atime for the slash node and use it to fix up the bare + * node. + * + * If there's only a bare node but the pathname leading up + * to this point ends in a slash, then _all_ of the child + * nodes of this node contain stuff inside the directory + * described by the bare node; so we use the whole of the + * maximum value we've computed so far to update the bare + * node. + */ + if (slashindex >= 0) { + ret = fake_atime_recurse(t, MNODE(t, sw->sw[slashindex].subnode, + trie_common), 1); + if (max < ret) + max = ret; + + subdir = ret; + } else if (last_seen_pathsep) { + subdir = max; + } else { + /* Don't update the bare subnode at all. */ + subdir = 0; + } + + if (bareindex >= 0) { + struct trie_leaf *leaf; + + leaf = MNODE(t, sw->sw[bareindex].subnode, trie_leaf); + + if (leaf && leaf->c.type == TRIE_LEAF) { + if (leaf->file.atime < subdir) + leaf->file.atime = subdir; + ret = leaf->file.atime; + } else { + /* Shouldn't really happen, but be cautious anyway */ + ret = fake_atime_recurse(t, &leaf->c, 0); + } + + if (max < ret) + max = ret; + } + + return max; + } +} + +void trie_fake_dir_atimes(void *t) +{ + struct trie_header *hdr = MNODE(t, 0, trie_header); + struct trie_common *node = MNODE(t, hdr->root, trie_common); + + fake_atime_recurse(t, node, 1); +} + +/* ---------------------------------------------------------------------- * Querying functions. */ @@ -494,6 +601,12 @@ unsigned long trie_count(const void *t) return hdr->count; } +char trie_pathsep(const void *t) +{ + const struct trie_header *hdr = NODE(t, 0, trie_header); + return (char)hdr->pathsep; +} + struct triewalk_switch { const struct trie_switch *sw; int pos, depth, count; @@ -516,6 +629,19 @@ triewalk *triewalk_new(const void *vt) return tw; } + +void triewalk_rebase(triewalk *tw, const void *t) +{ + ptrdiff_t diff = ((const unsigned char *)t - (const unsigned char *)(tw->t)); + int i; + + tw->t = t; + + for (i = 0; i < tw->nswitches; i++) + tw->switches[i].sw = (const struct trie_switch *) + ((const unsigned char *)(tw->switches[i].sw) + diff); +} + const struct trie_file *triewalk_next(triewalk *tw, char *buf) { off_t off; @@ -606,3 +732,12 @@ off_t trie_get_index_offset(const void *t) { return ((const struct trie_header *)t)->indexroot; } + +void make_successor(char *pathbuf) +{ + int len = strlen(pathbuf); + if (len > 0 && pathbuf[len-1] == pathsep) + len--; + pathbuf[len] = '\001'; + pathbuf[len+1] = '\0'; +}