X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/blobdiff_plain/0313b788487ecaadbe31429ea955fc920d8f6557..0089cdbb132d9486413c0c1a5fa2fba7571e8dae:/agedu.c diff --git a/agedu.c b/agedu.c index 68a9799..9ff6bb8 100644 --- a/agedu.c +++ b/agedu.c @@ -2,29 +2,12 @@ * Main program for agedu. */ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - #include "agedu.h" + #include "du.h" #include "trie.h" #include "index.h" -#include "malloc.h" +#include "alloc.h" #include "html.h" #include "httpd.h" #include "fgetline.h" @@ -66,6 +49,8 @@ struct ctx { struct inclusion_exclusion *inex; int ninex; int crossfs; + int usemtime; + int fakeatimes; }; static void dump_line(const char *pathname, const struct trie_file *tf) @@ -81,7 +66,7 @@ static void dump_line(const char *pathname, const struct trie_file *tf) putchar('\n'); } -static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) +static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) { struct ctx *ctx = (struct ctx *)vctx; struct trie_file file; @@ -102,7 +87,10 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) return 0; file.size = (unsigned long long)512 * st->st_blocks; - file.atime = st->st_atime; + if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode))) + file.atime = st->st_mtime; + else + file.atime = st->st_atime; /* * Filter based on wildcards. @@ -152,6 +140,24 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) return 1; } +static void scan_error(void *vctx, const char *fmt, ...) +{ + struct ctx *ctx = (struct ctx *)vctx; + va_list ap; + + if (ctx->progress) { + fprintf(stderr, "%-*s\r", ctx->progwidth, ""); + fflush(stderr); + } + + fprintf(stderr, "%s: ", PNAME); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + ctx->last_output_update--; /* force a progress report next time */ +} + static void text_query(const void *mappedfile, const char *querydir, time_t t, int depth) { @@ -258,6 +264,10 @@ static void text_query(const void *mappedfile, const char *querydir, * not bother defining logical identifiers for them at all - those * would be automatically generated, since I wouldn't have any * need to specify them manually in another part of the code.) + * + * One other helpful consequence of the enum-based structure here + * is that it causes a compiler error if I accidentally try to + * define the same option (short or long) twice. */ #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \ @@ -265,49 +275,53 @@ static void text_query(const void *mappedfile, const char *querydir, HELPPFX("actions") \ VAL(SCAN) SHORT(s) LONG(scan) \ HELPARG("directory") HELPOPT("scan and index a directory") \ - NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \ - VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \ - HELPARG("directory") HELPOPT("scan only, generating a dump") \ - NOVAL(LOAD) SHORT(l) LONG(load) \ - HELPOPT("load and index a dump file") \ + NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \ + HELPOPT("serve HTML reports from a temporary web server") \ VAL(TEXT) SHORT(t) LONG(text) \ HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \ + NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \ + HELPOPT("remove the index file") \ + NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \ + NOVAL(LOAD) SHORT(L) LONG(load) \ + HELPOPT("load and index a dump file") \ + VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \ + HELPARG("directory") HELPOPT("scan only, generating a dump") \ VAL(HTML) SHORT(H) LONG(html) \ HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \ - NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \ - HELPOPT("serve HTML reports from a temporary web server") \ HELPPFX("options") \ VAL(DATAFILE) SHORT(f) LONG(file) \ - HELPARG("filename") HELPOPT("[all modes] specify index file") \ - NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \ - HELPOPT("[--scan] report progress on stderr") \ - NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \ - HELPOPT("[--scan] do not report progress") \ - NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \ - LONG(progress_tty) LONG(scan_progress_tty) \ - HELPOPT("[--scan] report progress if stderr is a tty") \ + HELPARG("filename") HELPOPT("[most modes] specify index file") \ NOVAL(CROSSFS) LONG(cross_fs) \ HELPOPT("[--scan] cross filesystem boundaries") \ NOVAL(NOCROSSFS) LONG(no_cross_fs) \ HELPOPT("[--scan] stick to one filesystem") \ - VAL(INCLUDE) LONG(include) \ - HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \ - VAL(INCLUDEPATH) LONG(include_path) \ - HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \ - VAL(EXCLUDE) LONG(exclude) \ - HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \ - VAL(EXCLUDEPATH) LONG(exclude_path) \ - HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \ VAL(PRUNE) LONG(prune) \ HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \ VAL(PRUNEPATH) LONG(prune_path) \ HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \ - VAL(TQDEPTH) LONG(depth) LONG(max_depth) LONG(maximum_depth) \ - HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \ - VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ - HELPARG("age") HELPOPT("[--text] include only files older than this") \ + VAL(EXCLUDE) LONG(exclude) \ + HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \ + VAL(EXCLUDEPATH) LONG(exclude_path) \ + HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \ + VAL(INCLUDE) LONG(include) \ + HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \ + VAL(INCLUDEPATH) LONG(include_path) \ + HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \ + NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \ + HELPOPT("[--scan] report progress on stderr") \ + NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \ + HELPOPT("[--scan] do not report progress") \ + NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \ + LONG(progress_tty) LONG(scan_progress_tty) \ + HELPOPT("[--scan] report progress if stderr is a tty") \ + NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \ + HELPOPT("[--scan,--load] keep real atimes on directories") \ + NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \ + HELPOPT("[--scan,--load] fake atimes on directories") \ + NOVAL(MTIME) LONG(mtime) \ + HELPOPT("[--scan] use mtime instead of atime") \ VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \ - HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \ + HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \ VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \ LONG(server_addr) \ HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \ @@ -318,6 +332,10 @@ static void text_query(const void *mappedfile, const char *querydir, HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \ VAL(AUTHFD) LONG(auth_fd) \ HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \ + VAL(TQDEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \ + HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \ + VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ + HELPARG("age") HELPOPT("[--text] include only files older than this") \ HELPPFX("also") \ NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \ NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \ @@ -441,10 +459,10 @@ int main(int argc, char **argv) void *mappedfile; triewalk *tw; indexbuild *ib; - const struct trie_file *tf; + const struct trie_file *tf, *prevtf; char *filename = PNAME ".dat"; int doing_opts = 1; - enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD }; + enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE }; struct action { int mode; char *arg; @@ -462,6 +480,8 @@ int main(int argc, char **argv) int ninex = 0, inexsize = 0; int crossfs = 0; int tqdepth = 1; + int fakediratimes = 1; + int mtime = 0; #ifdef DEBUG_MAD_OPTION_PARSING_MACROS { @@ -601,7 +621,12 @@ int main(int argc, char **argv) usage(stdout); return 0; case OPT_VERSION: - printf("FIXME: version();\n"); +#ifdef PACKAGE_VERSION + printf("%s, revision %s\n", PNAME, PACKAGE_VERSION); +#else + printf("%s: version number not available when not built" + " via automake\n", PNAME); +#endif return 0; case OPT_LICENCE: { @@ -677,6 +702,15 @@ int main(int argc, char **argv) actions[nactions].arg = NULL; nactions++; break; + case OPT_REMOVE: + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = REMOVE; + actions[nactions].arg = NULL; + nactions++; + break; case OPT_PROGRESS: progress = 2; break; @@ -692,6 +726,15 @@ int main(int argc, char **argv) case OPT_NOCROSSFS: crossfs = 0; break; + case OPT_DIRATIME: + fakediratimes = 0; + break; + case OPT_NODIRATIME: + fakediratimes = 1; + break; + case OPT_MTIME: + mtime = 1; + break; case OPT_DATAFILE: filename = optval; break; @@ -844,6 +887,7 @@ int main(int argc, char **argv) if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { const char *scandir = actions[action].arg; + if (mode == LOAD) { char *buf = fgetline(stdin); unsigned newpathsep; @@ -862,7 +906,8 @@ int main(int argc, char **argv) /* * Prepare to write out the index file. */ - fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); + fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, + S_IRUSR | S_IWUSR); if (fd < 0) { fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, strerror(errno)); @@ -893,6 +938,8 @@ int main(int argc, char **argv) ctx->inex = inex; ctx->ninex = ninex; ctx->crossfs = crossfs; + ctx->fakeatimes = fakediratimes; + ctx->usemtime = mtime; ctx->last_output_update = time(NULL); @@ -902,7 +949,9 @@ int main(int argc, char **argv) ctx->progress = progress; { struct winsize ws; - if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0) + if (progress && + ioctl(2, TIOCGWINSZ, &ws) == 0 && + ws.ws_col > 0) ctx->progwidth = ws.ws_col - 1; else ctx->progwidth = 79; @@ -954,6 +1003,7 @@ int main(int argc, char **argv) p++; c = 0; for (i = 0; i < 2; i++) { + c *= 16; if (*p >= '0' && *p <= '9') c += *p - '0'; else if (*p >= 'A' && *p <= 'F') @@ -973,11 +1023,16 @@ int main(int argc, char **argv) *q = '\0'; triebuild_add(ctx->tb, buf, &tf); sfree(buf); + line++; } } else { - du(scandir, gotdata, ctx); + du(scandir, gotdata, scan_error, ctx); } if (mode != SCANDUMP) { + size_t maxpathlen; + size_t delta; + char *buf, *prevbuf; + count = triebuild_finish(ctx->tb); triebuild_free(ctx->tb); @@ -995,10 +1050,12 @@ int main(int argc, char **argv) return 1; } - printf("Built pathname index, %d entries, %ju bytes\n", count, - (intmax_t)st.st_size); + printf("Built pathname index, %d entries," + " %llu bytes of index\n", count, + (unsigned long long)st.st_size); - totalsize = index_compute_size(st.st_size, count); + totalsize = index_initial_size(st.st_size, count); + totalsize += totalsize / 10; if (lseek(fd, totalsize-1, SEEK_SET) < 0) { perror(PNAME ": lseek"); @@ -1009,19 +1066,124 @@ int main(int argc, char **argv) return 1; } - printf("Upper bound on index file size = %ju bytes\n", - (intmax_t)totalsize); - mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); if (!mappedfile) { perror(PNAME ": mmap"); return 1; } - ib = indexbuild_new(mappedfile, st.st_size, count); + if (fakediratimes) { + printf("Faking directory atimes\n"); + trie_fake_dir_atimes(mappedfile); + } + + printf("Building index\n"); + ib = indexbuild_new(mappedfile, st.st_size, count, &delta); + maxpathlen = trie_maxpathlen(mappedfile); + buf = snewn(maxpathlen, char); + prevbuf = snewn(maxpathlen, char); tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, NULL)) != NULL) - indexbuild_add(ib, tf); + prevbuf[0] = '\0'; + tf = triewalk_next(tw, buf); + assert(tf); + while (1) { + int i; + + if (totalsize - indexbuild_realsize(ib) < delta) { + const void *oldfile = mappedfile; + ptrdiff_t diff; + + /* + * Unmap the file, grow it, and remap it. + */ + munmap(mappedfile, totalsize); + + totalsize += delta; + totalsize += totalsize / 10; + + if (lseek(fd, totalsize-1, SEEK_SET) < 0) { + perror(PNAME ": lseek"); + return 1; + } + if (write(fd, "\0", 1) < 1) { + perror(PNAME ": write"); + return 1; + } + + mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; + } + + indexbuild_rebase(ib, mappedfile); + triewalk_rebase(tw, mappedfile); + diff = (const unsigned char *)mappedfile - + (const unsigned char *)oldfile; + if (prevtf) + prevtf = (const struct trie_file *) + (((const unsigned char *)prevtf) + diff); + if (tf) + tf = (const struct trie_file *) + (((const unsigned char *)tf) + diff); + } + + /* + * Get the next file from the index. So we are + * currently holding, and have not yet + * indexed, prevtf (with pathname prevbuf) and + * tf (with pathname buf). + */ + prevtf = tf; + memcpy(prevbuf, buf, maxpathlen); + tf = triewalk_next(tw, buf); + + if (!tf) + buf[0] = '\0'; + + /* + * Find the first differing character position + * between our two pathnames. + */ + for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++); + + /* + * If prevbuf was a directory name and buf is + * something inside that directory, then + * trie_before() will be called on prevbuf + * itself. Hence we must drop a tag before it, + * so that the resulting index is usable. + */ + if ((!prevbuf[i] && (buf[i] == pathsep || + (i > 0 && buf[i-1] == pathsep)))) + indexbuild_tag(ib); + + /* + * Add prevtf to the index. + */ + indexbuild_add(ib, prevtf); + + if (!tf) { + /* + * Drop an unconditional final tag, and + * get out of this loop. + */ + indexbuild_tag(ib); + break; + } + + /* + * If prevbuf was a filename inside some + * directory which buf is outside, then + * trie_before() will be called on some + * pathname either equal to buf or epsilon + * less than it. Either way, we're going to + * need to drop a tag after prevtf. + */ + if (strchr(prevbuf+i, pathsep) || !tf) + indexbuild_tag(ib); + } + triewalk_free(tw); realsize = indexbuild_realsize(ib); indexbuild_free(ib); @@ -1029,7 +1191,8 @@ int main(int argc, char **argv) munmap(mappedfile, totalsize); ftruncate(fd, realsize); close(fd); - printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); + printf("Final index file size = %llu bytes\n", + (unsigned long long)realsize); } } else if (mode == TEXT) { char *querydir = actions[action].arg; @@ -1061,6 +1224,8 @@ int main(int argc, char **argv) querydir[--pathlen] = '\0'; text_query(mappedfile, querydir, textcutoff, tqdepth); + + munmap(mappedfile, totalsize); } else if (mode == HTML) { char *querydir = actions[action].arg; size_t pathlen; @@ -1100,6 +1265,8 @@ int main(int argc, char **argv) cfg.newest = htmlnewest; html = html_query(mappedfile, xi, &cfg); fputs(html, stdout); + + munmap(mappedfile, totalsize); } else if (mode == DUMP) { size_t maxpathlen; char *buf; @@ -1130,6 +1297,8 @@ int main(int argc, char **argv) while ((tf = triewalk_next(tw, buf)) != NULL) dump_line(buf, tf); triewalk_free(tw); + + munmap(mappedfile, totalsize); } else if (mode == HTTPD) { struct html_config pcfg; struct httpd_config dcfg; @@ -1160,6 +1329,13 @@ int main(int argc, char **argv) pcfg.oldest = htmloldest; pcfg.newest = htmlnewest; run_httpd(mappedfile, auth, &dcfg, &pcfg); + munmap(mappedfile, totalsize); + } else if (mode == REMOVE) { + if (remove(filename) < 0) { + fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } } }