X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/blobdiff_plain/84849cbd12159f7950a5ee35a36f5251a6d8bd3e..1f651677c857d752bc477c937a80a9a3ada5aff4:/agedu.c diff --git a/agedu.c b/agedu.c index 246f002..add92ac 100644 --- a/agedu.c +++ b/agedu.c @@ -2,36 +2,16 @@ * Main program for agedu. */ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include +#include "agedu.h" #include "du.h" #include "trie.h" #include "index.h" -#include "malloc.h" +#include "alloc.h" #include "html.h" #include "httpd.h" #include "fgetline.h" -#define PNAME "agedu" - -#define lenof(x) (sizeof((x))/sizeof(*(x))) - /* * Path separator. This global variable affects the behaviour of * various parts of the code when they need to deal with path @@ -69,6 +49,8 @@ struct ctx { struct inclusion_exclusion *inex; int ninex; int crossfs; + int usemtime; + int fakeatimes; }; static void dump_line(const char *pathname, const struct trie_file *tf) @@ -84,7 +66,7 @@ static void dump_line(const char *pathname, const struct trie_file *tf) putchar('\n'); } -static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) +static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) { struct ctx *ctx = (struct ctx *)vctx; struct trie_file file; @@ -105,7 +87,10 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) return 0; file.size = (unsigned long long)512 * st->st_blocks; - file.atime = st->st_atime; + if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode))) + file.atime = st->st_mtime; + else + file.atime = max(st->st_mtime, st->st_atime); /* * Filter based on wildcards. @@ -155,13 +140,31 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) return 1; } +static void scan_error(void *vctx, const char *fmt, ...) +{ + struct ctx *ctx = (struct ctx *)vctx; + va_list ap; + + if (ctx->progress) { + fprintf(stderr, "%-*s\r", ctx->progwidth, ""); + fflush(stderr); + } + + fprintf(stderr, "%s: ", PNAME); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + ctx->last_output_update--; /* force a progress report next time */ +} + static void text_query(const void *mappedfile, const char *querydir, - time_t t, int depth) + time_t t, int showfiles, int depth) { size_t maxpathlen; char *pathbuf; unsigned long xi1, xi2; - unsigned long long s1, s2; + unsigned long long size; maxpathlen = trie_maxpathlen(mappedfile); pathbuf = snewn(maxpathlen + 1, char); @@ -171,22 +174,39 @@ static void text_query(const void *mappedfile, const char *querydir, * (inclusive) and that filename with a ^A on the end * (exclusive). So find the x indices for each. */ - sprintf(pathbuf, "%s\001", querydir); + strcpy(pathbuf, querydir); + make_successor(pathbuf); xi1 = trie_before(mappedfile, querydir); xi2 = trie_before(mappedfile, pathbuf); + if (!showfiles && xi2 - xi1 == 1) + return; /* file, or empty dir => no display */ + /* * Now do the lookups in the age index. */ - s1 = index_query(mappedfile, xi1, t); - s2 = index_query(mappedfile, xi2, t); + if (xi2 - xi1 == 1) { + /* + * We are querying an individual file, so we should not + * depend on the index entries either side of the node, + * since they almost certainly don't both exist. Instead, + * just look up the file's size and atime in the main trie. + */ + const struct trie_file *f = trie_getfile(mappedfile, xi1); + if (f->atime < t) + size = f->size; + else + size = 0; + } else { + unsigned long long s1, s2; + s1 = index_query(mappedfile, xi1, t); + s2 = index_query(mappedfile, xi2, t); + size = s2 - s1; + } - if (s1 == s2) + if (size == 0) return; /* no space taken up => no display */ - /* Display in units of 1Kb */ - printf("%-11llu %s\n", (s2 - s1) / 1024, querydir); - if (depth > 0) { /* * Now scan for first-level subdirectories and report @@ -195,11 +215,14 @@ static void text_query(const void *mappedfile, const char *querydir, xi1++; while (xi1 < xi2) { trie_getpath(mappedfile, xi1, pathbuf); - text_query(mappedfile, pathbuf, t, depth-1); - strcat(pathbuf, "\001"); + text_query(mappedfile, pathbuf, t, showfiles, depth-1); + make_successor(pathbuf); xi1 = trie_before(mappedfile, pathbuf); } } + + /* Display in units of 1Kb */ + printf("%-11llu %s\n", (size) / 1024, querydir); } /* @@ -257,54 +280,66 @@ static void text_query(const void *mappedfile, const char *querydir, * not bother defining logical identifiers for them at all - those * would be automatically generated, since I wouldn't have any * need to specify them manually in another part of the code.) + * + * One other helpful consequence of the enum-based structure here + * is that it causes a compiler error if I accidentally try to + * define the same option (short or long) twice. */ #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \ - HELPPFX("usage") HELPLINE("agedu [options] action") \ + HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \ HELPPFX("actions") \ VAL(SCAN) SHORT(s) LONG(scan) \ HELPARG("directory") HELPOPT("scan and index a directory") \ - NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \ - VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \ - HELPARG("directory") HELPOPT("scan only, generating a dump") \ - NOVAL(LOAD) SHORT(l) LONG(load) \ - HELPOPT("load and index a dump file") \ + NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \ + HELPOPT("serve HTML reports from a temporary web server") \ VAL(TEXT) SHORT(t) LONG(text) \ HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \ + NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \ + HELPOPT("remove the index file") \ + NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \ + NOVAL(LOAD) SHORT(L) LONG(load) \ + HELPOPT("load and index a dump file") \ + VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \ + HELPARG("directory") HELPOPT("scan only, generating a dump") \ VAL(HTML) SHORT(H) LONG(html) \ HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \ - NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \ - HELPOPT("serve HTML reports from a temporary web server") \ HELPPFX("options") \ VAL(DATAFILE) SHORT(f) LONG(file) \ - HELPARG("filename") HELPOPT("[all modes] specify index file") \ - NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \ - HELPOPT("[--scan] report progress on stderr") \ - NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \ - HELPOPT("[--scan] do not report progress") \ - NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \ - LONG(progress_tty) LONG(scan_progress_tty) \ - HELPOPT("[--scan] report progress if stderr is a tty") \ + HELPARG("filename") HELPOPT("[most modes] specify index file") \ NOVAL(CROSSFS) LONG(cross_fs) \ HELPOPT("[--scan] cross filesystem boundaries") \ NOVAL(NOCROSSFS) LONG(no_cross_fs) \ HELPOPT("[--scan] stick to one filesystem") \ - VAL(INCLUDE) LONG(include) \ - HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \ - VAL(INCLUDEPATH) LONG(include_path) \ - HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \ - VAL(EXCLUDE) LONG(exclude) \ - HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \ - VAL(EXCLUDEPATH) LONG(exclude_path) \ - HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \ VAL(PRUNE) LONG(prune) \ HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \ VAL(PRUNEPATH) LONG(prune_path) \ HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \ - VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ - HELPARG("age") HELPOPT("[--text] include only files older than this") \ + VAL(EXCLUDE) LONG(exclude) \ + HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \ + VAL(EXCLUDEPATH) LONG(exclude_path) \ + HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \ + VAL(INCLUDE) LONG(include) \ + HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \ + VAL(INCLUDEPATH) LONG(include_path) \ + HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \ + NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \ + HELPOPT("[--scan] report progress on stderr") \ + NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \ + HELPOPT("[--scan] do not report progress") \ + NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \ + LONG(progress_tty) LONG(scan_progress_tty) \ + HELPOPT("[--scan] report progress if stderr is a tty") \ + NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \ + HELPOPT("[--scan,--load] keep real atimes on directories") \ + NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \ + HELPOPT("[--scan,--load] fake atimes on directories") \ + NOVAL(MTIME) LONG(mtime) \ + HELPOPT("[--scan] use mtime instead of atime") \ + NOVAL(SHOWFILES) LONG(files) \ + HELPOPT("[--web,--html,--text] list individual files") \ VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \ - HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \ + HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \ VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \ LONG(server_addr) \ HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \ @@ -315,6 +350,10 @@ static void text_query(const void *mappedfile, const char *querydir, HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \ VAL(AUTHFD) LONG(auth_fd) \ HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \ + VAL(TQDEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \ + HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \ + VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ + HELPARG("age") HELPOPT("[--text] include only files older than this") \ HELPPFX("also") \ NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \ NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \ @@ -438,12 +477,15 @@ int main(int argc, char **argv) void *mappedfile; triewalk *tw; indexbuild *ib; - const struct trie_file *tf; - char *filename = "agedu.dat"; - char *scandir = NULL; - char *querydir = NULL; + const struct trie_file *tf, *prevtf; + char *filename = PNAME ".dat"; int doing_opts = 1; - enum { USAGE, TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD } mode = USAGE; + enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE }; + struct action { + int mode; + char *arg; + } *actions = NULL; + int nactions = 0, actionsize = 0, action; time_t now = time(NULL); time_t textcutoff = now, htmlnewest = now, htmloldest = now; int htmlautoagerange = 1; @@ -455,6 +497,10 @@ int main(int argc, char **argv) struct inclusion_exclusion *inex = NULL; int ninex = 0, inexsize = 0; int crossfs = 0; + int tqdepth = 1; + int fakediratimes = 1; + int mtime = 0; + int showfiles = 0; #ifdef DEBUG_MAD_OPTION_PARSING_MACROS { @@ -594,35 +640,95 @@ int main(int argc, char **argv) usage(stdout); return 0; case OPT_VERSION: - printf("FIXME: version();\n"); +#ifdef PACKAGE_VERSION + printf("%s, revision %s\n", PNAME, PACKAGE_VERSION); +#else + printf("%s: version number not available when not built" + " via automake\n", PNAME); +#endif return 0; case OPT_LICENCE: - printf("FIXME: licence();\n"); + { + extern const char *const licence[]; + int i; + + for (i = 0; licence[i]; i++) + fputs(licence[i], stdout); + + return 0; + } return 0; case OPT_SCAN: - mode = SCAN; - scandir = optval; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = SCAN; + actions[nactions].arg = optval; + nactions++; break; case OPT_SCANDUMP: - mode = SCANDUMP; - scandir = optval; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = SCANDUMP; + actions[nactions].arg = optval; + nactions++; break; case OPT_DUMP: - mode = DUMP; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = DUMP; + actions[nactions].arg = NULL; + nactions++; break; case OPT_LOAD: - mode = LOAD; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = LOAD; + actions[nactions].arg = NULL; + nactions++; break; case OPT_TEXT: - querydir = optval; - mode = TEXT; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = TEXT; + actions[nactions].arg = optval; + nactions++; break; case OPT_HTML: - mode = HTML; - querydir = optval; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = HTML; + actions[nactions].arg = optval; + nactions++; break; case OPT_HTTPD: - mode = HTTPD; + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = HTTPD; + actions[nactions].arg = NULL; + nactions++; + break; + case OPT_REMOVE: + if (nactions >= actionsize) { + actionsize = nactions * 3 / 2 + 16; + actions = sresize(actions, actionsize, struct action); + } + actions[nactions].mode = REMOVE; + actions[nactions].arg = NULL; + nactions++; break; case OPT_PROGRESS: progress = 2; @@ -639,9 +745,24 @@ int main(int argc, char **argv) case OPT_NOCROSSFS: crossfs = 0; break; + case OPT_DIRATIME: + fakediratimes = 0; + break; + case OPT_NODIRATIME: + fakediratimes = 1; + break; + case OPT_SHOWFILES: + showfiles = 1; + break; + case OPT_MTIME: + mtime = 1; + break; case OPT_DATAFILE: filename = optval; break; + case OPT_TQDEPTH: + tqdepth = atoi(optval); + break; case OPT_MINAGE: textcutoff = parse_age(now, optval); break; @@ -683,7 +804,7 @@ int main(int argc, char **argv) auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; else if (!strcmp(optval, "help") || !strcmp(optval, "list")) { - printf("agedu: supported HTTP authentication types" + printf(PNAME ": supported HTTP authentication types" " are:\n" " magic use Linux /proc/net/tcp to" " determine owner of peer socket\n" @@ -778,325 +899,488 @@ int main(int argc, char **argv) } } - if (mode == USAGE) { + if (nactions == 0) { usage(stderr); return 1; - } else if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { - - if (mode == LOAD) { - char *buf = fgetline(stdin); - unsigned newpathsep; - buf[strcspn(buf, "\r\n")] = '\0'; - if (1 != sscanf(buf, "agedu dump file. pathsep=%x", - &newpathsep)) { - fprintf(stderr, "%s: header in dump file not recognised\n", - PNAME); - return 1; + } + + for (action = 0; action < nactions; action++) { + int mode = actions[action].mode; + + if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { + const char *scandir = actions[action].arg; + + if (mode == LOAD) { + char *buf = fgetline(stdin); + unsigned newpathsep; + buf[strcspn(buf, "\r\n")] = '\0'; + if (1 != sscanf(buf, DUMPHDR "%x", + &newpathsep)) { + fprintf(stderr, "%s: header in dump file not recognised\n", + PNAME); + return 1; + } + pathsep = (char)newpathsep; + sfree(buf); } - pathsep = (char)newpathsep; - sfree(buf); - } - if (mode == SCAN || mode == LOAD) { + if (mode == SCAN || mode == LOAD) { + /* + * Prepare to write out the index file. + */ + fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, + S_IRUSR | S_IWUSR); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } + if (fstat(fd, &st) < 0) { + perror(PNAME ": fstat"); + return 1; + } + ctx->datafile_dev = st.st_dev; + ctx->datafile_ino = st.st_ino; + ctx->straight_to_dump = 0; + } else { + ctx->datafile_dev = -1; + ctx->datafile_ino = -1; + ctx->straight_to_dump = 1; + } + + if (mode == SCAN || mode == SCANDUMP) { + if (stat(scandir, &st) < 0) { + fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir, + strerror(errno)); + return 1; + } + ctx->filesystem_dev = crossfs ? 0 : st.st_dev; + } + + ctx->inex = inex; + ctx->ninex = ninex; + ctx->crossfs = crossfs; + ctx->fakeatimes = fakediratimes; + ctx->usemtime = mtime; + + ctx->last_output_update = time(NULL); + + /* progress==1 means report progress only if stderr is a tty */ + if (progress == 1) + progress = isatty(2) ? 2 : 0; + ctx->progress = progress; + { + struct winsize ws; + if (progress && + ioctl(2, TIOCGWINSZ, &ws) == 0 && + ws.ws_col > 0) + ctx->progwidth = ws.ws_col - 1; + else + ctx->progwidth = 79; + } + + if (mode == SCANDUMP) + printf(DUMPHDR "%02x\n", (unsigned char)pathsep); + /* - * Prepare to write out the index file. + * Scan the directory tree, and write out the trie component + * of the data file. */ - fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); - if (fd < 0) { - fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, - strerror(errno)); - return 1; + if (mode != SCANDUMP) { + ctx->tb = triebuild_new(fd); } - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; + if (mode == LOAD) { + char *buf; + int line = 2; + while ((buf = fgetline(stdin)) != NULL) { + struct trie_file tf; + char *p, *q; + + buf[strcspn(buf, "\r\n")] = '\0'; + + p = buf; + q = p; + while (*p && *p != ' ') p++; + if (!*p) { + fprintf(stderr, "%s: dump file line %d: expected at least" + " three fields\n", PNAME, line); + return 1; + } + *p++ = '\0'; + tf.size = strtoull(q, NULL, 10); + q = p; + while (*p && *p != ' ') p++; + if (!*p) { + fprintf(stderr, "%s: dump file line %d: expected at least" + " three fields\n", PNAME, line); + return 1; + } + *p++ = '\0'; + tf.atime = strtoull(q, NULL, 10); + q = buf; + while (*p) { + int c = *p; + if (*p == '%') { + int i; + p++; + c = 0; + for (i = 0; i < 2; i++) { + c *= 16; + if (*p >= '0' && *p <= '9') + c += *p - '0'; + else if (*p >= 'A' && *p <= 'F') + c += *p - ('A' - 10); + else if (*p >= 'a' && *p <= 'f') + c += *p - ('a' - 10); + else { + fprintf(stderr, "%s: dump file line %d: unable" + " to parse hex escape\n", PNAME, line); + } + p++; + } + } else { + p++; + } + *q++ = c; + } + *q = '\0'; + triebuild_add(ctx->tb, buf, &tf); + sfree(buf); + line++; + } + } else { + du(scandir, gotdata, scan_error, ctx); } - ctx->datafile_dev = st.st_dev; - ctx->datafile_ino = st.st_ino; - ctx->straight_to_dump = 0; - } else { - ctx->datafile_dev = -1; - ctx->datafile_ino = -1; - ctx->straight_to_dump = 1; - } + if (mode != SCANDUMP) { + size_t maxpathlen; + size_t delta; + char *buf, *prevbuf; - if (mode == SCAN || mode == SCANDUMP) { - if (stat(scandir, &st) < 0) { - fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir, - strerror(errno)); - return 1; - } - ctx->filesystem_dev = crossfs ? 0 : st.st_dev; - } + count = triebuild_finish(ctx->tb); + triebuild_free(ctx->tb); - ctx->inex = inex; - ctx->ninex = ninex; - ctx->crossfs = crossfs; - - ctx->last_output_update = time(NULL); - - /* progress==1 means report progress only if stderr is a tty */ - if (progress == 1) - progress = isatty(2) ? 2 : 0; - ctx->progress = progress; - { - struct winsize ws; - if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0) - ctx->progwidth = ws.ws_col - 1; - else - ctx->progwidth = 79; - } + if (ctx->progress) { + fprintf(stderr, "%-*s\r", ctx->progwidth, ""); + fflush(stderr); + } - if (mode == SCANDUMP) - printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep); + /* + * Work out how much space the cumulative index trees + * will take; enlarge the file, and memory-map it. + */ + if (fstat(fd, &st) < 0) { + perror(PNAME ": fstat"); + return 1; + } - /* - * Scan the directory tree, and write out the trie component - * of the data file. - */ - if (mode != SCANDUMP) { - ctx->tb = triebuild_new(fd); - } - if (mode == LOAD) { - char *buf; - int line = 2; - while ((buf = fgetline(stdin)) != NULL) { - struct trie_file tf; - char *p, *q; + printf("Built pathname index, %d entries," + " %llu bytes of index\n", count, + (unsigned long long)st.st_size); - buf[strcspn(buf, "\r\n")] = '\0'; + totalsize = index_initial_size(st.st_size, count); + totalsize += totalsize / 10; - p = buf; - q = p; - while (*p && *p != ' ') p++; - if (!*p) { - fprintf(stderr, "%s: dump file line %d: expected at least" - " three fields\n", PNAME, line); + if (lseek(fd, totalsize-1, SEEK_SET) < 0) { + perror(PNAME ": lseek"); return 1; } - *p++ = '\0'; - tf.size = strtoull(q, NULL, 10); - q = p; - while (*p && *p != ' ') p++; - if (!*p) { - fprintf(stderr, "%s: dump file line %d: expected at least" - " three fields\n", PNAME, line); + if (write(fd, "\0", 1) < 1) { + perror(PNAME ": write"); return 1; } - *p++ = '\0'; - tf.atime = strtoull(q, NULL, 10); - q = buf; - while (*p) { - int c = *p; - if (*p == '%') { - int i; - p++; - c = 0; - for (i = 0; i < 2; i++) { - if (*p >= '0' && *p <= '9') - c += *p - '0'; - else if (*p >= 'A' && *p <= 'F') - c += *p - ('A' - 10); - else if (*p >= 'a' && *p <= 'f') - c += *p - ('a' - 10); - else { - fprintf(stderr, "%s: dump file line %d: unable" - " to parse hex escape\n", PNAME, line); - } - p++; + + mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; + } + + if (fakediratimes) { + printf("Faking directory atimes\n"); + trie_fake_dir_atimes(mappedfile); + } + + printf("Building index\n"); + ib = indexbuild_new(mappedfile, st.st_size, count, &delta); + maxpathlen = trie_maxpathlen(mappedfile); + buf = snewn(maxpathlen, char); + prevbuf = snewn(maxpathlen, char); + tw = triewalk_new(mappedfile); + prevbuf[0] = '\0'; + tf = triewalk_next(tw, buf); + assert(tf); + while (1) { + int i; + + if (totalsize - indexbuild_realsize(ib) < delta) { + const void *oldfile = mappedfile; + ptrdiff_t diff; + + /* + * Unmap the file, grow it, and remap it. + */ + munmap(mappedfile, totalsize); + + totalsize += delta; + totalsize += totalsize / 10; + + if (lseek(fd, totalsize-1, SEEK_SET) < 0) { + perror(PNAME ": lseek"); + return 1; + } + if (write(fd, "\0", 1) < 1) { + perror(PNAME ": write"); + return 1; + } + + mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; } + + indexbuild_rebase(ib, mappedfile); + triewalk_rebase(tw, mappedfile); + diff = (const unsigned char *)mappedfile - + (const unsigned char *)oldfile; + if (prevtf) + prevtf = (const struct trie_file *) + (((const unsigned char *)prevtf) + diff); + if (tf) + tf = (const struct trie_file *) + (((const unsigned char *)tf) + diff); } - *q++ = c; - p++; + + /* + * Get the next file from the index. So we are + * currently holding, and have not yet + * indexed, prevtf (with pathname prevbuf) and + * tf (with pathname buf). + */ + prevtf = tf; + memcpy(prevbuf, buf, maxpathlen); + tf = triewalk_next(tw, buf); + + if (!tf) + buf[0] = '\0'; + + /* + * Find the first differing character position + * between our two pathnames. + */ + for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++); + + /* + * If prevbuf was a directory name and buf is + * something inside that directory, then + * trie_before() will be called on prevbuf + * itself. Hence we must drop a tag before it, + * so that the resulting index is usable. + */ + if ((!prevbuf[i] && (buf[i] == pathsep || + (i > 0 && buf[i-1] == pathsep)))) + indexbuild_tag(ib); + + /* + * Add prevtf to the index. + */ + indexbuild_add(ib, prevtf); + + if (!tf) { + /* + * Drop an unconditional final tag, and + * get out of this loop. + */ + indexbuild_tag(ib); + break; + } + + /* + * If prevbuf was a filename inside some + * directory which buf is outside, then + * trie_before() will be called on some + * pathname either equal to buf or epsilon + * less than it. Either way, we're going to + * need to drop a tag after prevtf. + */ + if (strchr(prevbuf+i, pathsep) || !tf) + indexbuild_tag(ib); } - *q = '\0'; - triebuild_add(ctx->tb, buf, &tf); - sfree(buf); + + triewalk_free(tw); + realsize = indexbuild_realsize(ib); + indexbuild_free(ib); + + munmap(mappedfile, totalsize); + ftruncate(fd, realsize); + close(fd); + printf("Final index file size = %llu bytes\n", + (unsigned long long)realsize); } - } else { - du(scandir, gotdata, ctx); - } - if (mode != SCANDUMP) { - count = triebuild_finish(ctx->tb); - triebuild_free(ctx->tb); + } else if (mode == TEXT) { + char *querydir = actions[action].arg; + size_t pathlen; - if (ctx->progress) { - fprintf(stderr, "%-*s\r", ctx->progwidth, ""); - fflush(stderr); + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } + if (fstat(fd, &st) < 0) { + perror(PNAME ": fstat"); + return 1; + } + totalsize = st.st_size; + mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; } + pathsep = trie_pathsep(mappedfile); /* - * Work out how much space the cumulative index trees - * will take; enlarge the file, and memory-map it. + * Trim trailing slash, just in case. */ + pathlen = strlen(querydir); + if (pathlen > 0 && querydir[pathlen-1] == pathsep) + querydir[--pathlen] = '\0'; + + text_query(mappedfile, querydir, textcutoff, showfiles, tqdepth); + + munmap(mappedfile, totalsize); + } else if (mode == HTML) { + char *querydir = actions[action].arg; + size_t pathlen, maxpathlen; + char *pathbuf; + struct html_config cfg; + unsigned long xi; + char *html; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); + perror(PNAME ": fstat"); + return 1; + } + totalsize = st.st_size; + mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); return 1; } + pathsep = trie_pathsep(mappedfile); - printf("Built pathname index, %d entries, %ju bytes\n", count, - (intmax_t)st.st_size); + maxpathlen = trie_maxpathlen(mappedfile); + pathbuf = snewn(maxpathlen, char); - totalsize = index_compute_size(st.st_size, count); + /* + * Trim trailing slash, just in case. + */ + pathlen = strlen(querydir); + if (pathlen > 0 && querydir[pathlen-1] == pathsep) + querydir[--pathlen] = '\0'; + + xi = trie_before(mappedfile, querydir); + if (xi >= trie_count(mappedfile) || + (trie_getpath(mappedfile, xi, pathbuf), + strcmp(pathbuf, querydir))) { + fprintf(stderr, "%s: pathname '%s' does not exist in index\n" + "%*s(check it is spelled exactly as it is in the " + "index, including\n%*sany leading './')\n", + PNAME, querydir, + (int)(1+sizeof(PNAME)), "", + (int)(1+sizeof(PNAME)), ""); + } else if (!index_has_root(mappedfile, xi)) { + fprintf(stderr, "%s: pathname '%s' is" + " a file, not a directory\n", PNAME, querydir); + } else { + cfg.format = NULL; + cfg.autoage = htmlautoagerange; + cfg.oldest = htmloldest; + cfg.newest = htmlnewest; + cfg.showfiles = showfiles; + html = html_query(mappedfile, xi, &cfg); + fputs(html, stdout); + } - if (lseek(fd, totalsize-1, SEEK_SET) < 0) { - perror("agedu: lseek"); + munmap(mappedfile, totalsize); + sfree(pathbuf); + } else if (mode == DUMP) { + size_t maxpathlen; + char *buf; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, + strerror(errno)); return 1; } - if (write(fd, "\0", 1) < 1) { - perror("agedu: write"); + if (fstat(fd, &st) < 0) { + perror(PNAME ": fstat"); return 1; } - - printf("Upper bound on index file size = %ju bytes\n", - (intmax_t)totalsize); - - mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + totalsize = st.st_size; + mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); if (!mappedfile) { - perror("agedu: mmap"); + perror(PNAME ": mmap"); return 1; } + pathsep = trie_pathsep(mappedfile); - ib = indexbuild_new(mappedfile, st.st_size, count); + maxpathlen = trie_maxpathlen(mappedfile); + buf = snewn(maxpathlen, char); + + printf(DUMPHDR "%02x\n", (unsigned char)pathsep); tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, NULL)) != NULL) - indexbuild_add(ib, tf); + while ((tf = triewalk_next(tw, buf)) != NULL) + dump_line(buf, tf); triewalk_free(tw); - realsize = indexbuild_realsize(ib); - indexbuild_free(ib); munmap(mappedfile, totalsize); - ftruncate(fd, realsize); - close(fd); - printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); - } - } else if (mode == TEXT) { - size_t pathlen; + } else if (mode == HTTPD) { + struct html_config pcfg; + struct httpd_config dcfg; - fd = open(filename, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, - strerror(errno)); - return 1; - } - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; - } - totalsize = st.st_size; - mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); - if (!mappedfile) { - perror("agedu: mmap"); - return 1; - } - pathsep = trie_pathsep(mappedfile); - - /* - * Trim trailing slash, just in case. - */ - pathlen = strlen(querydir); - if (pathlen > 0 && querydir[pathlen-1] == pathsep) - querydir[--pathlen] = '\0'; - - text_query(mappedfile, querydir, textcutoff, 1); - } else if (mode == HTML) { - size_t pathlen; - struct html_config cfg; - unsigned long xi; - char *html; - - fd = open(filename, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, - strerror(errno)); - return 1; - } - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; - } - totalsize = st.st_size; - mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); - if (!mappedfile) { - perror("agedu: mmap"); - return 1; - } - pathsep = trie_pathsep(mappedfile); - - /* - * Trim trailing slash, just in case. - */ - pathlen = strlen(querydir); - if (pathlen > 0 && querydir[pathlen-1] == pathsep) - querydir[--pathlen] = '\0'; - - xi = trie_before(mappedfile, querydir); - cfg.format = NULL; - cfg.autoage = htmlautoagerange; - cfg.oldest = htmloldest; - cfg.newest = htmlnewest; - html = html_query(mappedfile, xi, &cfg); - fputs(html, stdout); - } else if (mode == DUMP) { - size_t maxpathlen; - char *buf; - - fd = open(filename, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, - strerror(errno)); - return 1; - } - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; - } - totalsize = st.st_size; - mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); - if (!mappedfile) { - perror("agedu: mmap"); - return 1; - } - pathsep = trie_pathsep(mappedfile); - - maxpathlen = trie_maxpathlen(mappedfile); - buf = snewn(maxpathlen, char); - - printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep); - tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, buf)) != NULL) - dump_line(buf, tf); - triewalk_free(tw); - } else if (mode == HTTPD) { - struct html_config pcfg; - struct httpd_config dcfg; - - fd = open(filename, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, - strerror(errno)); - return 1; - } - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; - } - totalsize = st.st_size; - mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); - if (!mappedfile) { - perror("agedu: mmap"); - return 1; + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } + if (fstat(fd, &st) < 0) { + perror(PNAME ": fstat"); + return 1; + } + totalsize = st.st_size; + mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; + } + pathsep = trie_pathsep(mappedfile); + + dcfg.address = httpserveraddr; + dcfg.port = httpserverport; + dcfg.basicauthdata = httpauthdata; + pcfg.format = NULL; + pcfg.autoage = htmlautoagerange; + pcfg.oldest = htmloldest; + pcfg.newest = htmlnewest; + pcfg.showfiles = showfiles; + run_httpd(mappedfile, auth, &dcfg, &pcfg); + munmap(mappedfile, totalsize); + } else if (mode == REMOVE) { + if (remove(filename) < 0) { + fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } } - pathsep = trie_pathsep(mappedfile); - - dcfg.address = httpserveraddr; - dcfg.port = httpserverport; - dcfg.basicauthdata = httpauthdata; - pcfg.format = NULL; - pcfg.autoage = htmlautoagerange; - pcfg.oldest = htmloldest; - pcfg.newest = htmlnewest; - run_httpd(mappedfile, auth, &dcfg, &pcfg); } return 0;