X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/blobdiff_plain/56fa1896af0a1b40aa04ea8c3116ad99da263cb0..cfe942fb3391ae80e816272d7baa45be30e5e7ec:/agedu.c diff --git a/agedu.c b/agedu.c index c20abf9..a3c2db6 100644 --- a/agedu.c +++ b/agedu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -25,11 +26,22 @@ #include "malloc.h" #include "html.h" #include "httpd.h" +#include "fgetline.h" #define PNAME "agedu" #define lenof(x) (sizeof((x))/sizeof(*(x))) +/* + * Path separator. This global variable affects the behaviour of + * various parts of the code when they need to deal with path + * separators. The path separator appropriate to a particular data + * set is encoded in the index file storing that data set; data + * sets generated on Unix will of course have the default '/', but + * foreign data sets are conceivable and must be handled correctly. + */ +char pathsep = '/'; + void fatal(const char *fmt, ...) { va_list ap; @@ -42,7 +54,7 @@ void fatal(const char *fmt, ...) } struct inclusion_exclusion { - int include; + int type; const char *wildcard; int path; }; @@ -53,11 +65,25 @@ struct ctx { ino_t datafile_ino; time_t last_output_update; int progress, progwidth; + int straight_to_dump; struct inclusion_exclusion *inex; int ninex; int crossfs; }; +static void dump_line(const char *pathname, const struct trie_file *tf) +{ + const char *p; + printf("%llu %llu ", tf->size, tf->atime); + for (p = pathname; *p; p++) { + if (*p >= ' ' && *p < 127 && *p != '%') + putchar(*p); + else + printf("%%%02x", (unsigned char)*p); + } + putchar('\n'); +} + static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) { struct ctx *ctx = (struct ctx *)vctx; @@ -78,42 +104,58 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev) return 0; + file.size = (unsigned long long)512 * st->st_blocks; + file.atime = st->st_atime; + /* * Filter based on wildcards. */ include = 1; - filename = strrchr(pathname, '/'); + filename = strrchr(pathname, pathsep); if (!filename) filename = pathname; else filename++; for (i = 0; i < ctx->ninex; i++) { if (fnmatch(ctx->inex[i].wildcard, - ctx->inex[i].path ? pathname : filename, - FNM_PATHNAME) == 0) - include = ctx->inex[i].include; + ctx->inex[i].path ? pathname : filename, 0) == 0) + include = ctx->inex[i].type; + } + if (include == -1) + return 0; /* ignore this entry and any subdirs */ + if (include == 0) { + /* + * Here we are supposed to be filtering an entry out, but + * still recursing into it if it's a directory. However, + * we can't actually leave out any directory whose + * subdirectories we then look at. So we cheat, in that + * case, by setting the size to zero. + */ + if (!S_ISDIR(st->st_mode)) + return 0; /* just ignore */ + else + file.size = 0; } - if (!include) - return 1; /* filter, but don't prune */ - file.blocks = st->st_blocks; - file.atime = st->st_atime; - triebuild_add(ctx->tb, pathname, &file); + if (ctx->straight_to_dump) + dump_line(pathname, &file); + else + triebuild_add(ctx->tb, pathname, &file); - t = time(NULL); - if (t != ctx->last_output_update) { - if (ctx->progress) { + if (ctx->progress) { + t = time(NULL); + if (t != ctx->last_output_update) { fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth, pathname); fflush(stderr); + ctx->last_output_update = t; } - ctx->last_output_update = t; } return 1; } -static void text_query(const void *mappedfile, const char *rootdir, +static void text_query(const void *mappedfile, const char *querydir, time_t t, int depth) { size_t maxpathlen; @@ -129,8 +171,9 @@ static void text_query(const void *mappedfile, const char *rootdir, * (inclusive) and that filename with a ^A on the end * (exclusive). So find the x indices for each. */ - sprintf(pathbuf, "%s\001", rootdir); - xi1 = trie_before(mappedfile, rootdir); + strcpy(pathbuf, querydir); + make_successor(pathbuf); + xi1 = trie_before(mappedfile, querydir); xi2 = trie_before(mappedfile, pathbuf); /* @@ -139,8 +182,11 @@ static void text_query(const void *mappedfile, const char *rootdir, s1 = index_query(mappedfile, xi1, t); s2 = index_query(mappedfile, xi2, t); - /* Display in units of 2 512-byte blocks = 1Kb */ - printf("%-11llu %s\n", (s2 - s1) / 2, rootdir); + if (s1 == s2) + return; /* no space taken up => no display */ + + /* Display in units of 1Kb */ + printf("%-11llu %s\n", (s2 - s1) / 1024, querydir); if (depth > 0) { /* @@ -151,7 +197,7 @@ static void text_query(const void *mappedfile, const char *rootdir, while (xi1 < xi2) { trie_getpath(mappedfile, xi1, pathbuf); text_query(mappedfile, pathbuf, t, depth-1); - strcat(pathbuf, "\001"); + make_successor(pathbuf); xi1 = trie_before(mappedfile, pathbuf); } } @@ -176,7 +222,9 @@ static void text_query(const void *mappedfile, const char *rootdir, * parallel to both of the above giving the logical option id * for each physical short and long option * - define an array indexed by logical option id indicating - * whether the option in question takes a value. + * whether the option in question takes a value + * - define a function which prints out brief online help for all + * the options. * * It's not at all clear to me that this trickery is actually * particularly _efficient_ - it still, after all, requires going @@ -197,11 +245,11 @@ static void text_query(const void *mappedfile, const char *rootdir, * complete language. I said it was largely frivolous :-) * * This approach does have the virtue that it brings together the - * option ids and option spellings into a single combined list and - * defines them all in exactly one place. If I want to add a new - * option, or a new spelling for an option, I only have to modify - * the main OPTIONS macro below and then add code to process the - * new logical id. + * option ids, option spellings and help text into a single + * combined list and defines them all in exactly one place. If I + * want to add a new option, or a new spelling for an option, I + * only have to modify the main OPTHELP macro below and then add + * code to process the new logical id. * * (Though, really, even that isn't ideal, since it still involves * modifying the source file in more than one place. In a @@ -212,29 +260,67 @@ static void text_query(const void *mappedfile, const char *rootdir, * need to specify them manually in another part of the code.) */ -#define OPTIONS(NOVAL, VAL, SHORT, LONG) \ - NOVAL(HELP) SHORT(h) LONG(help) \ - NOVAL(VERSION) SHORT(V) LONG(version) \ - NOVAL(LICENCE) LONG(licence) LONG(license) \ - NOVAL(SCAN) SHORT(s) LONG(scan) \ - NOVAL(DUMP) SHORT(d) LONG(dump) \ - NOVAL(TEXT) SHORT(t) LONG(text) \ - NOVAL(HTML) SHORT(H) LONG(html) \ +#define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \ + HELPPFX("usage") HELPLINE("agedu [options] action") \ + HELPPFX("actions") \ + VAL(SCAN) SHORT(s) LONG(scan) \ + HELPARG("directory") HELPOPT("scan and index a directory") \ + NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \ + VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \ + HELPARG("directory") HELPOPT("scan only, generating a dump") \ + NOVAL(LOAD) SHORT(l) LONG(load) \ + HELPOPT("load and index a dump file") \ + VAL(TEXT) SHORT(t) LONG(text) \ + HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \ + VAL(HTML) SHORT(H) LONG(html) \ + HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \ NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \ + HELPOPT("serve HTML reports from a temporary web server") \ + HELPPFX("options") \ + VAL(DATAFILE) SHORT(f) LONG(file) \ + HELPARG("filename") HELPOPT("[all modes] specify index file") \ NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \ + HELPOPT("[--scan] report progress on stderr") \ NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \ + HELPOPT("[--scan] do not report progress") \ NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \ LONG(progress_tty) LONG(scan_progress_tty) \ + HELPOPT("[--scan] report progress if stderr is a tty") \ NOVAL(CROSSFS) LONG(cross_fs) \ + HELPOPT("[--scan] cross filesystem boundaries") \ NOVAL(NOCROSSFS) LONG(no_cross_fs) \ - VAL(DATAFILE) SHORT(f) LONG(file) \ - VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ - VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \ - LONG(server_auth) LONG(web_auth) \ + HELPOPT("[--scan] stick to one filesystem") \ VAL(INCLUDE) LONG(include) \ + HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \ VAL(INCLUDEPATH) LONG(include_path) \ + HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \ VAL(EXCLUDE) LONG(exclude) \ - VAL(EXCLUDEPATH) LONG(exclude_path) + HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \ + VAL(EXCLUDEPATH) LONG(exclude_path) \ + HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \ + VAL(PRUNE) LONG(prune) \ + HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \ + VAL(PRUNEPATH) LONG(prune_path) \ + HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \ + VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ + HELPARG("age") HELPOPT("[--text] include only files older than this") \ + VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \ + HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \ + VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \ + LONG(server_addr) \ + HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \ + VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \ + LONG(server_auth) LONG(web_auth) \ + HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \ + VAL(AUTHFILE) LONG(auth_file) \ + HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \ + VAL(AUTHFD) LONG(auth_fd) \ + HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \ + HELPPFX("also") \ + NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \ + NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \ + NOVAL(LICENCE) LONG(licence) LONG(license) \ + HELPOPT("display (MIT) licence text") \ #define IGNORE(x) #define DEFENUM(x) OPT_ ## x, @@ -251,6 +337,9 @@ static void text_query(const void *mappedfile, const char *rootdir, #define LONGOPTVAL(x) LONGVAL_ ## x, #define LONGTMP(x) SHORTtmp3_ ## x, +#define OPTIONS(NOVAL, VAL, SHORT, LONG) \ + OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE) + enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS }; enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS }; enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS }; @@ -262,6 +351,85 @@ enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) }; static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)}; static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)}; +static void usage(FILE *fp) +{ + char longbuf[80]; + const char *prefix, *shortopt, *longopt, *optarg; + int i, optex; + +#define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1 +#define HELPNOVAL(s) optex = 0; +#define HELPVAL(s) optex = 1; +#define HELPSHORT(s) if (!shortopt) shortopt = "-" #s; +#define HELPLONG(s) if (!longopt) { \ + strcpy(longbuf, "--" #s); longopt = longbuf; \ + for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; } +#define HELPPFX(s) prefix = s; +#define HELPARG(s) optarg = s; +#define HELPLINE(s) assert(optex == -1); \ + fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \ + HELPRESET; +#define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \ + assert(shortopt || longopt); \ + i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \ + shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \ + optarg?" ":"", optarg?optarg:""); \ + fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET; + + HELPRESET; + OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG, + HELPPFX, HELPARG, HELPLINE, HELPOPT); + +#undef HELPRESET +#undef HELPNOVAL +#undef HELPVAL +#undef HELPSHORT +#undef HELPLONG +#undef HELPPFX +#undef HELPARG +#undef HELPLINE +#undef HELPOPT +} + +static time_t parse_age(time_t now, const char *agestr) +{ + time_t t; + struct tm tm; + int nunits; + char unit[2]; + + t = now; + + if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) { + fprintf(stderr, "%s: age specification should be a number followed by" + " one of d,w,m,y\n", PNAME); + exit(1); + } + + if (unit[0] == 'd') { + t -= 86400 * nunits; + } else if (unit[0] == 'w') { + t -= 86400 * 7 * nunits; + } else { + int ym; + + tm = *localtime(&t); + ym = tm.tm_year * 12 + tm.tm_mon; + + if (unit[0] == 'm') + ym -= nunits; + else + ym -= 12 * nunits; + + tm.tm_year = ym / 12; + tm.tm_mon = ym % 12; + + t = mktime(&tm); + } + + return t; +} + int main(int argc, char **argv) { int fd, count; @@ -273,10 +441,16 @@ int main(int argc, char **argv) indexbuild *ib; const struct trie_file *tf; char *filename = "agedu.dat"; - char *rootdir = NULL; + char *scandir = NULL; + char *querydir = NULL; int doing_opts = 1; - enum { USAGE, TEXT, HTML, SCAN, DUMP, HTTPD } mode = USAGE; - char *minage = "0d"; + enum { USAGE, TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD } mode = USAGE; + time_t now = time(NULL); + time_t textcutoff = now, htmlnewest = now, htmloldest = now; + int htmlautoagerange = 1; + const char *httpserveraddr = NULL; + int httpserverport = 0; + const char *httpauthdata = NULL; int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; int progress = 1; struct inclusion_exclusion *inex = NULL; @@ -418,7 +592,7 @@ int main(int argc, char **argv) */ switch (optid) { case OPT_HELP: - printf("FIXME: usage();\n"); + usage(stdout); return 0; case OPT_VERSION: printf("FIXME: version();\n"); @@ -428,15 +602,25 @@ int main(int argc, char **argv) return 0; case OPT_SCAN: mode = SCAN; + scandir = optval; + break; + case OPT_SCANDUMP: + mode = SCANDUMP; + scandir = optval; break; case OPT_DUMP: mode = DUMP; break; + case OPT_LOAD: + mode = LOAD; + break; case OPT_TEXT: + querydir = optval; mode = TEXT; break; case OPT_HTML: mode = HTML; + querydir = optval; break; case OPT_HTTPD: mode = HTTPD; @@ -460,7 +644,34 @@ int main(int argc, char **argv) filename = optval; break; case OPT_MINAGE: - minage = optval; + textcutoff = parse_age(now, optval); + break; + case OPT_AGERANGE: + if (!strcmp(optval, "auto")) { + htmlautoagerange = 1; + } else { + char *q = optval + strcspn(optval, "-:"); + if (*q) + *q++ = '\0'; + htmloldest = parse_age(now, optval); + htmlnewest = *q ? parse_age(now, q) : now; + htmlautoagerange = 0; + } + break; + case OPT_SERVERADDR: + { + char *port; + if (optval[0] == '[' && + (port = strchr(optval, ']')) != NULL) + port++; + else + port = optval; + port += strcspn(port, ":"); + if (port) + *port++ = '\0'; + httpserveraddr = optval; + httpserverport = atoi(port); + } break; case OPT_AUTH: if (!strcmp(optval, "magic")) @@ -471,7 +682,20 @@ int main(int argc, char **argv) auth = HTTPD_AUTH_NONE; else if (!strcmp(optval, "default")) auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; - else { + else if (!strcmp(optval, "help") || + !strcmp(optval, "list")) { + printf("agedu: supported HTTP authentication types" + " are:\n" + " magic use Linux /proc/net/tcp to" + " determine owner of peer socket\n" + " basic HTTP Basic username and" + " password authentication\n" + " default use 'magic' if possible, " + " otherwise fall back to 'basic'\n" + " none unauthenticated HTTP (if" + " the data file is non-confidential)\n"); + return 0; + } else { fprintf(stderr, "%s: unrecognised authentication" " type '%s'\n%*s options are 'magic'," " 'basic', 'none', 'default'\n", @@ -479,62 +703,133 @@ int main(int argc, char **argv) return 1; } break; + case OPT_AUTHFILE: + case OPT_AUTHFD: + { + int fd; + char namebuf[40]; + const char *name; + char *authbuf; + int authlen, authsize; + int ret; + + if (optid == OPT_AUTHFILE) { + fd = open(optval, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, + optval, strerror(errno)); + return 1; + } + name = optval; + } else { + fd = atoi(optval); + name = namebuf; + sprintf(namebuf, "fd %d", fd); + } + + authlen = 0; + authsize = 256; + authbuf = snewn(authsize, char); + while ((ret = read(fd, authbuf+authlen, + authsize-authlen)) > 0) { + authlen += ret; + if ((authsize - authlen) < (authsize / 16)) { + authsize = authlen * 3 / 2 + 4096; + authbuf = sresize(authbuf, authsize, char); + } + } + if (ret < 0) { + fprintf(stderr, "%s: %s: read: %s\n", PNAME, + name, strerror(errno)); + return 1; + } + if (optid == OPT_AUTHFILE) + close(fd); + httpauthdata = authbuf; + } + break; case OPT_INCLUDE: case OPT_INCLUDEPATH: case OPT_EXCLUDE: case OPT_EXCLUDEPATH: + case OPT_PRUNE: + case OPT_PRUNEPATH: if (ninex >= inexsize) { inexsize = ninex * 3 / 2 + 16; inex = sresize(inex, inexsize, struct inclusion_exclusion); } inex[ninex].path = (optid == OPT_INCLUDEPATH || - optid == OPT_EXCLUDEPATH); - inex[ninex].include = (optid == OPT_INCLUDE || - optid == OPT_INCLUDEPATH); + optid == OPT_EXCLUDEPATH || + optid == OPT_PRUNEPATH); + inex[ninex].type = (optid == OPT_INCLUDE ? 1 : + optid == OPT_INCLUDEPATH ? 1 : + optid == OPT_EXCLUDE ? 0 : + optid == OPT_EXCLUDEPATH ? 0 : + optid == OPT_PRUNE ? -1 : + /* optid == OPT_PRUNEPATH ? */ -1); inex[ninex].wildcard = optval; ninex++; break; } } } else { - if (!rootdir) { - rootdir = p; - } else { - fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p); - return 1; - } + fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p); + return 1; } } - if (!rootdir) - rootdir = "."; - if (mode == USAGE) { - printf("FIXME: usage();\n"); - return 0; - } else if (mode == SCAN) { - - fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); - if (fd < 0) { - fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, - strerror(errno)); - return 1; + usage(stderr); + return 1; + } else if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { + + if (mode == LOAD) { + char *buf = fgetline(stdin); + unsigned newpathsep; + buf[strcspn(buf, "\r\n")] = '\0'; + if (1 != sscanf(buf, "agedu dump file. pathsep=%x", + &newpathsep)) { + fprintf(stderr, "%s: header in dump file not recognised\n", + PNAME); + return 1; + } + pathsep = (char)newpathsep; + sfree(buf); } - if (stat(rootdir, &st) < 0) { - fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir, - strerror(errno)); - return 1; + if (mode == SCAN || mode == LOAD) { + /* + * Prepare to write out the index file. + */ + fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); + if (fd < 0) { + fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, + strerror(errno)); + return 1; + } + if (fstat(fd, &st) < 0) { + perror("agedu: fstat"); + return 1; + } + ctx->datafile_dev = st.st_dev; + ctx->datafile_ino = st.st_ino; + ctx->straight_to_dump = 0; + } else { + ctx->datafile_dev = -1; + ctx->datafile_ino = -1; + ctx->straight_to_dump = 1; } - ctx->filesystem_dev = crossfs ? 0 : st.st_dev; - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; + if (mode == SCAN || mode == SCANDUMP) { + if (stat(scandir, &st) < 0) { + fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir, + strerror(errno)); + return 1; + } + ctx->filesystem_dev = crossfs ? 0 : st.st_dev; } - ctx->datafile_dev = st.st_dev; - ctx->datafile_ino = st.st_ino; + ctx->inex = inex; ctx->ninex = ninex; ctx->crossfs = crossfs; @@ -553,99 +848,131 @@ int main(int argc, char **argv) ctx->progwidth = 79; } + if (mode == SCANDUMP) + printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep); + /* * Scan the directory tree, and write out the trie component * of the data file. */ - ctx->tb = triebuild_new(fd); - du(rootdir, gotdata, ctx); - count = triebuild_finish(ctx->tb); - triebuild_free(ctx->tb); - - if (ctx->progress) { - fprintf(stderr, "%-*s\r", ctx->progwidth, ""); - fflush(stderr); - } - - /* - * Work out how much space the cumulative index trees will - * take; enlarge the file, and memory-map it. - */ - if (fstat(fd, &st) < 0) { - perror("agedu: fstat"); - return 1; - } - - printf("Built pathname index, %d entries, %ju bytes\n", count, - (intmax_t)st.st_size); - - totalsize = index_compute_size(st.st_size, count); - - if (lseek(fd, totalsize-1, SEEK_SET) < 0) { - perror("agedu: lseek"); - return 1; + if (mode != SCANDUMP) { + ctx->tb = triebuild_new(fd); } - if (write(fd, "\0", 1) < 1) { - perror("agedu: write"); - return 1; - } - - printf("Upper bound on index file size = %ju bytes\n", - (intmax_t)totalsize); - - mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); - if (!mappedfile) { - perror("agedu: mmap"); - return 1; + if (mode == LOAD) { + char *buf; + int line = 2; + while ((buf = fgetline(stdin)) != NULL) { + struct trie_file tf; + char *p, *q; + + buf[strcspn(buf, "\r\n")] = '\0'; + + p = buf; + q = p; + while (*p && *p != ' ') p++; + if (!*p) { + fprintf(stderr, "%s: dump file line %d: expected at least" + " three fields\n", PNAME, line); + return 1; + } + *p++ = '\0'; + tf.size = strtoull(q, NULL, 10); + q = p; + while (*p && *p != ' ') p++; + if (!*p) { + fprintf(stderr, "%s: dump file line %d: expected at least" + " three fields\n", PNAME, line); + return 1; + } + *p++ = '\0'; + tf.atime = strtoull(q, NULL, 10); + q = buf; + while (*p) { + int c = *p; + if (*p == '%') { + int i; + p++; + c = 0; + for (i = 0; i < 2; i++) { + if (*p >= '0' && *p <= '9') + c += *p - '0'; + else if (*p >= 'A' && *p <= 'F') + c += *p - ('A' - 10); + else if (*p >= 'a' && *p <= 'f') + c += *p - ('a' - 10); + else { + fprintf(stderr, "%s: dump file line %d: unable" + " to parse hex escape\n", PNAME, line); + } + p++; + } + } + *q++ = c; + p++; + } + *q = '\0'; + triebuild_add(ctx->tb, buf, &tf); + sfree(buf); + } + } else { + du(scandir, gotdata, ctx); } + if (mode != SCANDUMP) { + count = triebuild_finish(ctx->tb); + triebuild_free(ctx->tb); - ib = indexbuild_new(mappedfile, st.st_size, count); - tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, NULL)) != NULL) - indexbuild_add(ib, tf); - triewalk_free(tw); - realsize = indexbuild_realsize(ib); - indexbuild_free(ib); - - munmap(mappedfile, totalsize); - ftruncate(fd, realsize); - close(fd); - printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); - } else if (mode == TEXT) { - time_t t; - struct tm tm; - int nunits; - char unit[2]; - size_t pathlen; + if (ctx->progress) { + fprintf(stderr, "%-*s\r", ctx->progwidth, ""); + fflush(stderr); + } - t = time(NULL); + /* + * Work out how much space the cumulative index trees + * will take; enlarge the file, and memory-map it. + */ + if (fstat(fd, &st) < 0) { + perror("agedu: fstat"); + return 1; + } - if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) { - fprintf(stderr, "%s: minimum age should be a number followed by" - " one of d,w,m,y\n", PNAME); - return 1; - } + printf("Built pathname index, %d entries, %ju bytes\n", count, + (intmax_t)st.st_size); - if (unit[0] == 'd') { - t -= 86400 * nunits; - } else if (unit[0] == 'w') { - t -= 86400 * 7 * nunits; - } else { - int ym; + totalsize = index_compute_size(st.st_size, count); - tm = *localtime(&t); - ym = tm.tm_year * 12 + tm.tm_mon; + if (lseek(fd, totalsize-1, SEEK_SET) < 0) { + perror("agedu: lseek"); + return 1; + } + if (write(fd, "\0", 1) < 1) { + perror("agedu: write"); + return 1; + } - if (unit[0] == 'm') - ym -= nunits; - else - ym -= 12 * nunits; + printf("Upper bound on index file size = %ju bytes\n", + (intmax_t)totalsize); - tm.tm_year = ym / 12; - tm.tm_mon = ym % 12; + mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + if (!mappedfile) { + perror("agedu: mmap"); + return 1; + } - t = mktime(&tm); + ib = indexbuild_new(mappedfile, st.st_size, count); + tw = triewalk_new(mappedfile); + while ((tf = triewalk_next(tw, NULL)) != NULL) + indexbuild_add(ib, tf); + triewalk_free(tw); + realsize = indexbuild_realsize(ib); + indexbuild_free(ib); + + munmap(mappedfile, totalsize); + ftruncate(fd, realsize); + close(fd); + printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); } + } else if (mode == TEXT) { + size_t pathlen; fd = open(filename, O_RDONLY); if (fd < 0) { @@ -663,17 +990,19 @@ int main(int argc, char **argv) perror("agedu: mmap"); return 1; } + pathsep = trie_pathsep(mappedfile); /* * Trim trailing slash, just in case. */ - pathlen = strlen(rootdir); - if (pathlen > 0 && rootdir[pathlen-1] == '/') - rootdir[--pathlen] = '\0'; + pathlen = strlen(querydir); + if (pathlen > 0 && querydir[pathlen-1] == pathsep) + querydir[--pathlen] = '\0'; - text_query(mappedfile, rootdir, t, 1); + text_query(mappedfile, querydir, textcutoff, 1); } else if (mode == HTML) { size_t pathlen; + struct html_config cfg; unsigned long xi; char *html; @@ -693,16 +1022,21 @@ int main(int argc, char **argv) perror("agedu: mmap"); return 1; } + pathsep = trie_pathsep(mappedfile); /* * Trim trailing slash, just in case. */ - pathlen = strlen(rootdir); - if (pathlen > 0 && rootdir[pathlen-1] == '/') - rootdir[--pathlen] = '\0'; - - xi = trie_before(mappedfile, rootdir); - html = html_query(mappedfile, xi, NULL); + pathlen = strlen(querydir); + if (pathlen > 0 && querydir[pathlen-1] == pathsep) + querydir[--pathlen] = '\0'; + + xi = trie_before(mappedfile, querydir); + cfg.format = NULL; + cfg.autoage = htmlautoagerange; + cfg.oldest = htmloldest; + cfg.newest = htmlnewest; + html = html_query(mappedfile, xi, &cfg); fputs(html, stdout); } else if (mode == DUMP) { size_t maxpathlen; @@ -724,16 +1058,20 @@ int main(int argc, char **argv) perror("agedu: mmap"); return 1; } + pathsep = trie_pathsep(mappedfile); maxpathlen = trie_maxpathlen(mappedfile); buf = snewn(maxpathlen, char); + printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep); tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, buf)) != NULL) { - printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime); - } + while ((tf = triewalk_next(tw, buf)) != NULL) + dump_line(buf, tf); triewalk_free(tw); } else if (mode == HTTPD) { + struct html_config pcfg; + struct httpd_config dcfg; + fd = open(filename, O_RDONLY); if (fd < 0) { fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, @@ -750,8 +1088,16 @@ int main(int argc, char **argv) perror("agedu: mmap"); return 1; } - - run_httpd(mappedfile, auth); + pathsep = trie_pathsep(mappedfile); + + dcfg.address = httpserveraddr; + dcfg.port = httpserverport; + dcfg.basicauthdata = httpauthdata; + pcfg.format = NULL; + pcfg.autoage = htmlautoagerange; + pcfg.oldest = htmloldest; + pcfg.newest = htmlnewest; + run_httpd(mappedfile, auth, &dcfg, &pcfg); } return 0;