X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/blobdiff_plain/56cae6e16d22dfba1aafe91dc3465a684d808124..7e25423cd18ab429b13c6e2ea920d9c44c0f263c:/agedu.c diff --git a/agedu.c b/agedu.c index 533dc4c..99fa1f8 100644 --- a/agedu.c +++ b/agedu.c @@ -90,7 +90,7 @@ static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode))) file.atime = st->st_mtime; else - file.atime = st->st_atime; + file.atime = max(st->st_mtime, st->st_atime); /* * Filter based on wildcards. @@ -140,6 +140,24 @@ static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) return 1; } +static void scan_error(void *vctx, const char *fmt, ...) +{ + struct ctx *ctx = (struct ctx *)vctx; + va_list ap; + + if (ctx->progress) { + fprintf(stderr, "%-*s\r", ctx->progwidth, ""); + fflush(stderr); + } + + fprintf(stderr, "%s: ", PNAME); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + ctx->last_output_update--; /* force a progress report next time */ +} + static void text_query(const void *mappedfile, const char *querydir, time_t t, int depth) { @@ -441,7 +459,7 @@ int main(int argc, char **argv) void *mappedfile; triewalk *tw; indexbuild *ib; - const struct trie_file *tf; + const struct trie_file *tf, *prevtf; char *filename = PNAME ".dat"; int doing_opts = 1; enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE }; @@ -869,6 +887,7 @@ int main(int argc, char **argv) if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { const char *scandir = actions[action].arg; + if (mode == LOAD) { char *buf = fgetline(stdin); unsigned newpathsep; @@ -887,7 +906,8 @@ int main(int argc, char **argv) /* * Prepare to write out the index file. */ - fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); + fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, + S_IRUSR | S_IWUSR); if (fd < 0) { fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, strerror(errno)); @@ -929,7 +949,9 @@ int main(int argc, char **argv) ctx->progress = progress; { struct winsize ws; - if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0) + if (progress && + ioctl(2, TIOCGWINSZ, &ws) == 0 && + ws.ws_col > 0) ctx->progwidth = ws.ws_col - 1; else ctx->progwidth = 79; @@ -1004,9 +1026,13 @@ int main(int argc, char **argv) line++; } } else { - du(scandir, gotdata, ctx); + du(scandir, gotdata, scan_error, ctx); } if (mode != SCANDUMP) { + size_t maxpathlen; + size_t delta; + char *buf, *prevbuf; + count = triebuild_finish(ctx->tb); triebuild_free(ctx->tb); @@ -1024,10 +1050,12 @@ int main(int argc, char **argv) return 1; } - printf("Built pathname index, %d entries, %llu bytes\n", count, + printf("Built pathname index, %d entries," + " %llu bytes of index\n", count, (unsigned long long)st.st_size); - totalsize = index_compute_size(st.st_size, count); + totalsize = index_initial_size(st.st_size, count); + totalsize += totalsize / 10; if (lseek(fd, totalsize-1, SEEK_SET) < 0) { perror(PNAME ": lseek"); @@ -1038,9 +1066,6 @@ int main(int argc, char **argv) return 1; } - printf("Upper bound on index file size = %llu bytes\n", - (unsigned long long)totalsize); - mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); if (!mappedfile) { perror(PNAME ": mmap"); @@ -1053,10 +1078,112 @@ int main(int argc, char **argv) } printf("Building index\n"); - ib = indexbuild_new(mappedfile, st.st_size, count); + ib = indexbuild_new(mappedfile, st.st_size, count, &delta); + maxpathlen = trie_maxpathlen(mappedfile); + buf = snewn(maxpathlen, char); + prevbuf = snewn(maxpathlen, char); tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, NULL)) != NULL) - indexbuild_add(ib, tf); + prevbuf[0] = '\0'; + tf = triewalk_next(tw, buf); + assert(tf); + while (1) { + int i; + + if (totalsize - indexbuild_realsize(ib) < delta) { + const void *oldfile = mappedfile; + ptrdiff_t diff; + + /* + * Unmap the file, grow it, and remap it. + */ + munmap(mappedfile, totalsize); + + totalsize += delta; + totalsize += totalsize / 10; + + if (lseek(fd, totalsize-1, SEEK_SET) < 0) { + perror(PNAME ": lseek"); + return 1; + } + if (write(fd, "\0", 1) < 1) { + perror(PNAME ": write"); + return 1; + } + + mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; + } + + indexbuild_rebase(ib, mappedfile); + triewalk_rebase(tw, mappedfile); + diff = (const unsigned char *)mappedfile - + (const unsigned char *)oldfile; + if (prevtf) + prevtf = (const struct trie_file *) + (((const unsigned char *)prevtf) + diff); + if (tf) + tf = (const struct trie_file *) + (((const unsigned char *)tf) + diff); + } + + /* + * Get the next file from the index. So we are + * currently holding, and have not yet + * indexed, prevtf (with pathname prevbuf) and + * tf (with pathname buf). + */ + prevtf = tf; + memcpy(prevbuf, buf, maxpathlen); + tf = triewalk_next(tw, buf); + + if (!tf) + buf[0] = '\0'; + + /* + * Find the first differing character position + * between our two pathnames. + */ + for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++); + + /* + * If prevbuf was a directory name and buf is + * something inside that directory, then + * trie_before() will be called on prevbuf + * itself. Hence we must drop a tag before it, + * so that the resulting index is usable. + */ + if ((!prevbuf[i] && (buf[i] == pathsep || + (i > 0 && buf[i-1] == pathsep)))) + indexbuild_tag(ib); + + /* + * Add prevtf to the index. + */ + indexbuild_add(ib, prevtf); + + if (!tf) { + /* + * Drop an unconditional final tag, and + * get out of this loop. + */ + indexbuild_tag(ib); + break; + } + + /* + * If prevbuf was a filename inside some + * directory which buf is outside, then + * trie_before() will be called on some + * pathname either equal to buf or epsilon + * less than it. Either way, we're going to + * need to drop a tag after prevtf. + */ + if (strchr(prevbuf+i, pathsep) || !tf) + indexbuild_tag(ib); + } + triewalk_free(tw); realsize = indexbuild_realsize(ib); indexbuild_free(ib); @@ -1064,7 +1191,7 @@ int main(int argc, char **argv) munmap(mappedfile, totalsize); ftruncate(fd, realsize); close(fd); - printf("Actual index file size = %llu bytes\n", + printf("Final index file size = %llu bytes\n", (unsigned long long)realsize); } } else if (mode == TEXT) {