Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / agedu.c
diff --git a/agedu.c b/agedu.c
index d7d9126..1847e2c 100644 (file)
--- a/agedu.c
+++ b/agedu.c
@@ -2,36 +2,16 @@
  * Main program for agedu.
  */
 
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <time.h>
-#include <assert.h>
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <termios.h>
-#include <sys/ioctl.h>
-#include <fnmatch.h>
+#include "agedu.h"
 
 #include "du.h"
 #include "trie.h"
 #include "index.h"
-#include "malloc.h"
+#include "alloc.h"
 #include "html.h"
 #include "httpd.h"
 #include "fgetline.h"
 
-#define PNAME "agedu"
-
-#define lenof(x) (sizeof((x))/sizeof(*(x)))
-
 /*
  * Path separator. This global variable affects the behaviour of
  * various parts of the code when they need to deal with path
@@ -69,22 +49,28 @@ struct ctx {
     struct inclusion_exclusion *inex;
     int ninex;
     int crossfs;
+    int usemtime;
+    int fakeatimes;
 };
 
 static void dump_line(const char *pathname, const struct trie_file *tf)
 {
     const char *p;
-    printf("%llu %llu ", tf->size, tf->atime);
+    if (printf("%llu %llu ", tf->size, tf->atime) < 0) goto error;
     for (p = pathname; *p; p++) {
-       if (*p >= ' ' && *p < 127 && *p != '%')
-           putchar(*p);
-       else
-           printf("%%%02x", (unsigned char)*p);
+       if (*p >= ' ' && *p < 127 && *p != '%') {
+           if (putchar(*p) == EOF) goto error;
+       } else {
+           if (printf("%%%02x", (unsigned char)*p) < 0) goto error;
+       }
     }
-    putchar('\n');
+    if (putchar('\n') == EOF) goto error;
+    return;
+    error:
+    fatal("standard output: %s", strerror(errno));
 }
 
-static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
+static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
 {
     struct ctx *ctx = (struct ctx *)vctx;
     struct trie_file file;
@@ -105,7 +91,10 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
        return 0;
 
     file.size = (unsigned long long)512 * st->st_blocks;
-    file.atime = st->st_atime;
+    if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode)))
+       file.atime = st->st_mtime;
+    else
+       file.atime = max(st->st_mtime, st->st_atime);
 
     /*
      * Filter based on wildcards.
@@ -155,13 +144,31 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
     return 1;
 }
 
+static void scan_error(void *vctx, const char *fmt, ...)
+{
+    struct ctx *ctx = (struct ctx *)vctx;
+    va_list ap;
+
+    if (ctx->progress) {
+       fprintf(stderr, "%-*s\r", ctx->progwidth, "");
+       fflush(stderr);
+    }
+
+    fprintf(stderr, "%s: ", PNAME);
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    va_end(ap);
+
+    ctx->last_output_update--;        /* force a progress report next time */
+}
+
 static void text_query(const void *mappedfile, const char *querydir,
-                      time_t t, int depth)
+                      time_t t, int showfiles, int depth, FILE *fp)
 {
     size_t maxpathlen;
     char *pathbuf;
     unsigned long xi1, xi2;
-    unsigned long long s1, s2;
+    unsigned long long size;
 
     maxpathlen = trie_maxpathlen(mappedfile);
     pathbuf = snewn(maxpathlen + 1, char);
@@ -176,31 +183,51 @@ static void text_query(const void *mappedfile, const char *querydir,
     xi1 = trie_before(mappedfile, querydir);
     xi2 = trie_before(mappedfile, pathbuf);
 
+    if (!showfiles && xi2 - xi1 == 1)
+       return;                        /* file, or empty dir => no display */
+
     /*
      * Now do the lookups in the age index.
      */
-    s1 = index_query(mappedfile, xi1, t);
-    s2 = index_query(mappedfile, xi2, t);
+    if (xi2 - xi1 == 1) {
+       /*
+        * We are querying an individual file, so we should not
+        * depend on the index entries either side of the node,
+        * since they almost certainly don't both exist. Instead,
+        * just look up the file's size and atime in the main trie.
+        */
+       const struct trie_file *f = trie_getfile(mappedfile, xi1);
+       if (f->atime < t)
+           size = f->size;
+       else
+           size = 0;
+    } else {
+       unsigned long long s1, s2;
+       s1 = index_query(mappedfile, xi1, t);
+       s2 = index_query(mappedfile, xi2, t);
+       size = s2 - s1;
+    }
 
-    if (s1 == s2)
+    if (size == 0)
        return;                        /* no space taken up => no display */
 
-    /* Display in units of 1Kb */
-    printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
-
-    if (depth > 0) {
+    if (depth != 0) {
        /*
         * Now scan for first-level subdirectories and report
         * those too.
         */
+       int newdepth = (depth > 0 ? depth - 1 : depth);
        xi1++;
        while (xi1 < xi2) {
            trie_getpath(mappedfile, xi1, pathbuf);
-           text_query(mappedfile, pathbuf, t, depth-1);
+           text_query(mappedfile, pathbuf, t, showfiles, newdepth, fp);
            make_successor(pathbuf);
            xi1 = trie_before(mappedfile, pathbuf);
        }
     }
+
+    /* Display in units of 1Kb */
+    fprintf(fp, "%-11llu %s\n", (size) / 1024, querydir);
 }
 
 /*
@@ -258,54 +285,72 @@ static void text_query(const void *mappedfile, const char *querydir,
  * not bother defining logical identifiers for them at all - those
  * would be automatically generated, since I wouldn't have any
  * need to specify them manually in another part of the code.)
+ *
+ * One other helpful consequence of the enum-based structure here
+ * is that it causes a compiler error if I accidentally try to
+ * define the same option (short or long) twice.
  */
 
 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
-    HELPPFX("usage") HELPLINE("agedu [options] action [action...]") \
+    HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
     HELPPFX("actions") \
     VAL(SCAN) SHORT(s) LONG(scan) \
        HELPARG("directory") HELPOPT("scan and index a directory") \
-    NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \
-    VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
-       HELPARG("directory") HELPOPT("scan only, generating a dump") \
-    NOVAL(LOAD) SHORT(l) LONG(load) \
-       HELPOPT("load and index a dump file") \
+    NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
+        HELPOPT("serve HTML reports from a temporary web server") \
     VAL(TEXT) SHORT(t) LONG(text) \
        HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
+    NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \
+        HELPOPT("remove the index file") \
+    NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \
+    NOVAL(LOAD) SHORT(L) LONG(load) \
+       HELPOPT("load and index a dump file") \
+    VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
+       HELPARG("directory") HELPOPT("scan only, generating a dump") \
     VAL(HTML) SHORT(H) LONG(html) \
        HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
-    NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
-        HELPOPT("serve HTML reports from a temporary web server") \
+    NOVAL(CGI) LONG(cgi) \
+        HELPOPT("do the right thing when run from a CGI script") \
     HELPPFX("options") \
     VAL(DATAFILE) SHORT(f) LONG(file) \
-        HELPARG("filename") HELPOPT("[all modes] specify index file") \
-    NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
-        HELPOPT("[--scan] report progress on stderr") \
-    NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
-        HELPOPT("[--scan] do not report progress") \
-    NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
-                      LONG(progress_tty) LONG(scan_progress_tty) \
-        HELPOPT("[--scan] report progress if stderr is a tty") \
+        HELPARG("filename") HELPOPT("[most modes] specify index file") \
     NOVAL(CROSSFS) LONG(cross_fs) \
         HELPOPT("[--scan] cross filesystem boundaries") \
     NOVAL(NOCROSSFS) LONG(no_cross_fs) \
         HELPOPT("[--scan] stick to one filesystem") \
-    VAL(INCLUDE) LONG(include) \
-        HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
-    VAL(INCLUDEPATH) LONG(include_path) \
-        HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
-    VAL(EXCLUDE) LONG(exclude) \
-        HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
-    VAL(EXCLUDEPATH) LONG(exclude_path) \
-        HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
     VAL(PRUNE) LONG(prune) \
         HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
     VAL(PRUNEPATH) LONG(prune_path) \
         HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
-    VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
-        HELPARG("age") HELPOPT("[--text] include only files older than this") \
+    VAL(EXCLUDE) LONG(exclude) \
+        HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
+    VAL(EXCLUDEPATH) LONG(exclude_path) \
+        HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
+    VAL(INCLUDE) LONG(include) \
+        HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
+    VAL(INCLUDEPATH) LONG(include_path) \
+        HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
+    NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
+        HELPOPT("[--scan] report progress on stderr") \
+    NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
+        HELPOPT("[--scan] do not report progress") \
+    NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
+                      LONG(progress_tty) LONG(scan_progress_tty) \
+        HELPOPT("[--scan] report progress if stderr is a tty") \
+    NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \
+        HELPOPT("[--scan,--load] keep real atimes on directories") \
+    NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
+        HELPOPT("[--scan,--load] fake atimes on directories") \
+    NOVAL(NOEOF) LONG(no_eof) LONG(noeof) \
+        HELPOPT("[--web] do not close web server on EOF") \
+    NOVAL(MTIME) LONG(mtime) \
+        HELPOPT("[--scan] use mtime instead of atime") \
+    NOVAL(SHOWFILES) LONG(files) \
+        HELPOPT("[--web,--html,--text] list individual files") \
     VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
-        HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \
+        HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \
+    VAL(OUTFILE) SHORT(o) LONG(output) \
+       HELPARG("filename") HELPOPT("[--html] specify output file or directory name") \
     VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
               LONG(server_addr) \
         HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
@@ -316,6 +361,12 @@ static void text_query(const void *mappedfile, const char *querydir,
         HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
     VAL(AUTHFD) LONG(auth_fd) \
         HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
+    VAL(HTMLTITLE) LONG(title) \
+        HELPARG("title") HELPOPT("[--web,--html] title prefix for web pages") \
+    VAL(DEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
+        HELPARG("levels") HELPOPT("[--text,--html] recurse to this many levels") \
+    VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
+        HELPARG("age") HELPOPT("[--text] include only files older than this") \
     HELPPFX("also") \
     NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
     NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
@@ -346,8 +397,8 @@ enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
-enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
-enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
+enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) UNUSEDENUMVAL1 };
+enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) UNUSEDENUMVAL2 };
 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
 
@@ -439,10 +490,10 @@ int main(int argc, char **argv)
     void *mappedfile;
     triewalk *tw;
     indexbuild *ib;
-    const struct trie_file *tf;
-    char *filename = "agedu.dat";
+    const struct trie_file *tf, *prevtf;
+    char *filename = PNAME ".dat";
     int doing_opts = 1;
-    enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD };
+    enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE };
     struct action {
        int mode;
        char *arg;
@@ -451,14 +502,21 @@ int main(int argc, char **argv)
     time_t now = time(NULL);
     time_t textcutoff = now, htmlnewest = now, htmloldest = now;
     int htmlautoagerange = 1;
-    const char *httpserveraddr = NULL;
-    int httpserverport = 0;
+    const char *httpserveraddr = "localhost";
+    const char *httpserverport = NULL;
     const char *httpauthdata = NULL;
+    const char *outfile = NULL;
+    const char *html_title = PNAME;
     int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
     int progress = 1;
     struct inclusion_exclusion *inex = NULL;
     int ninex = 0, inexsize = 0;
     int crossfs = 0;
+    int depth = -1, gotdepth = 0;
+    int fakediratimes = 1;
+    int mtime = 0;
+    int closeoneof = 1;
+    int showfiles = 0;
 
 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
     {
@@ -598,10 +656,21 @@ int main(int argc, char **argv)
                    usage(stdout);
                    return 0;
                  case OPT_VERSION:
-                   printf("FIXME: version();\n");
+#ifdef PACKAGE_VERSION
+                   printf("%s, revision %s\n", PNAME, PACKAGE_VERSION);
+#else
+                   printf("%s: version number not available when not built"
+                          " via automake\n", PNAME);
+#endif
                    return 0;
                  case OPT_LICENCE:
-                   printf("FIXME: licence();\n");
+                   {
+                       extern const char *const licence[];
+                       int i;
+
+                       for (i = 0; licence[i]; i++)
+                           fputs(licence[i], stdout);
+                   }
                    return 0;
                  case OPT_SCAN:
                    if (nactions >= actionsize) {
@@ -649,12 +718,14 @@ int main(int argc, char **argv)
                    nactions++;
                    break;
                  case OPT_HTML:
+                 case OPT_CGI:
                    if (nactions >= actionsize) {
                        actionsize = nactions * 3 / 2 + 16;
                        actions = sresize(actions, actionsize, struct action);
                    }
                    actions[nactions].mode = HTML;
-                   actions[nactions].arg = optval;
+                   actions[nactions].arg = (optid == OPT_HTML ? optval :
+                                            NULL);
                    nactions++;
                    break;
                  case OPT_HTTPD:
@@ -666,6 +737,15 @@ int main(int argc, char **argv)
                    actions[nactions].arg = NULL;
                    nactions++;
                    break;
+                 case OPT_REMOVE:
+                   if (nactions >= actionsize) {
+                       actionsize = nactions * 3 / 2 + 16;
+                       actions = sresize(actions, actionsize, struct action);
+                   }
+                   actions[nactions].mode = REMOVE;
+                   actions[nactions].arg = NULL;
+                   nactions++;
+                   break;
                  case OPT_PROGRESS:
                    progress = 2;
                    break;
@@ -681,9 +761,42 @@ int main(int argc, char **argv)
                  case OPT_NOCROSSFS:
                    crossfs = 0;
                    break;
+                 case OPT_DIRATIME:
+                   fakediratimes = 0;
+                   break;
+                 case OPT_NODIRATIME:
+                   fakediratimes = 1;
+                   break;
+                 case OPT_SHOWFILES:
+                   showfiles = 1;
+                   break;
+                 case OPT_MTIME:
+                   mtime = 1;
+                   break;
+                  case OPT_NOEOF:
+                    closeoneof = 0;
+                    break;
                  case OPT_DATAFILE:
                    filename = optval;
                    break;
+                 case OPT_DEPTH:
+                   if (!strcasecmp(optval, "unlimited") ||
+                       !strcasecmp(optval, "infinity") ||
+                       !strcasecmp(optval, "infinite") ||
+                       !strcasecmp(optval, "inf") ||
+                       !strcasecmp(optval, "maximum") ||
+                       !strcasecmp(optval, "max"))
+                       depth = -1;
+                   else
+                       depth = atoi(optval);
+                   gotdepth = 1;
+                   break;
+                 case OPT_OUTFILE:
+                   outfile = optval;
+                   break;
+                  case OPT_HTMLTITLE:
+                    html_title = optval;
+                    break;
                  case OPT_MINAGE:
                    textcutoff = parse_age(now, optval);
                    break;
@@ -708,10 +821,13 @@ int main(int argc, char **argv)
                        else
                            port = optval;
                        port += strcspn(port, ":");
-                       if (port)
+                       if (port && *port)
                            *port++ = '\0';
-                       httpserveraddr = optval;
-                       httpserverport = atoi(port);
+                        if (!strcmp(optval, "ANY"))
+                            httpserveraddr = NULL;
+                        else
+                            httpserveraddr = optval;
+                       httpserverport = port;
                    }
                    break;
                  case OPT_AUTH:
@@ -725,7 +841,7 @@ int main(int argc, char **argv)
                        auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
                    else if (!strcmp(optval, "help") ||
                             !strcmp(optval, "list")) {
-                       printf("agedu: supported HTTP authentication types"
+                       printf(PNAME ": supported HTTP authentication types"
                               " are:\n"
                               "       magic      use Linux /proc/net/tcp to"
                               " determine owner of peer socket\n"
@@ -830,11 +946,12 @@ int main(int argc, char **argv)
 
        if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
            const char *scandir = actions[action].arg;
+
            if (mode == LOAD) {
                char *buf = fgetline(stdin);
                unsigned newpathsep;
                buf[strcspn(buf, "\r\n")] = '\0';
-               if (1 != sscanf(buf, "agedu dump file. pathsep=%x",
+               if (1 != sscanf(buf, DUMPHDR "%x",
                                &newpathsep)) {
                    fprintf(stderr, "%s: header in dump file not recognised\n",
                            PNAME);
@@ -848,14 +965,15 @@ int main(int argc, char **argv)
                /*
                 * Prepare to write out the index file.
                 */
-               fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
+               fd = open(filename, O_RDWR | O_TRUNC | O_CREAT,
+                         S_IRUSR | S_IWUSR);
                if (fd < 0) {
                    fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
                            strerror(errno));
                    return 1;
                }
                if (fstat(fd, &st) < 0) {
-                   perror("agedu: fstat");
+                   perror(PNAME ": fstat");
                    return 1;
                }
                ctx->datafile_dev = st.st_dev;
@@ -879,6 +997,8 @@ int main(int argc, char **argv)
            ctx->inex = inex;
            ctx->ninex = ninex;
            ctx->crossfs = crossfs;
+           ctx->fakeatimes = fakediratimes;
+           ctx->usemtime = mtime;
 
            ctx->last_output_update = time(NULL);
 
@@ -888,14 +1008,16 @@ int main(int argc, char **argv)
            ctx->progress = progress;
            {
                struct winsize ws;
-               if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
+               if (progress &&
+                   ioctl(2, TIOCGWINSZ, &ws) == 0 &&
+                   ws.ws_col > 0)
                    ctx->progwidth = ws.ws_col - 1;
                else
                    ctx->progwidth = 79;
            }
 
            if (mode == SCANDUMP)
-               printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep);
+               printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
 
            /*
             * Scan the directory tree, and write out the trie component
@@ -940,6 +1062,7 @@ int main(int argc, char **argv)
                            p++;
                            c = 0;
                            for (i = 0; i < 2; i++) {
+                               c *= 16;
                                if (*p >= '0' && *p <= '9')
                                    c += *p - '0';
                                else if (*p >= 'A' && *p <= 'F')
@@ -952,18 +1075,24 @@ int main(int argc, char **argv)
                                }
                                p++;
                            }
+                       } else {
+                           p++;
                        }
                        *q++ = c;
-                       p++;
                    }
                    *q = '\0';
                    triebuild_add(ctx->tb, buf, &tf);
                    sfree(buf);
+                   line++;
                }
            } else {
-               du(scandir, gotdata, ctx);
+               du(scandir, gotdata, scan_error, ctx);
            }
            if (mode != SCANDUMP) {
+               size_t maxpathlen;
+               size_t delta;
+               char *buf, *prevbuf;
+
                count = triebuild_finish(ctx->tb);
                triebuild_free(ctx->tb);
 
@@ -977,45 +1106,151 @@ int main(int argc, char **argv)
                 * will take; enlarge the file, and memory-map it.
                 */
                if (fstat(fd, &st) < 0) {
-                   perror("agedu: fstat");
+                   perror(PNAME ": fstat");
                    return 1;
                }
 
-               printf("Built pathname index, %d entries, %ju bytes\n", count,
-                      (intmax_t)st.st_size);
+               printf("Built pathname index, %d entries,"
+                      " %llu bytes of index\n", count,
+                      (unsigned long long)st.st_size);
 
-               totalsize = index_compute_size(st.st_size, count);
+               totalsize = index_initial_size(st.st_size, count);
+               totalsize += totalsize / 10;
 
                if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
-                   perror("agedu: lseek");
+                   perror(PNAME ": lseek");
                    return 1;
                }
                if (write(fd, "\0", 1) < 1) {
-                   perror("agedu: write");
+                   perror(PNAME ": write");
                    return 1;
                }
 
-               printf("Upper bound on index file size = %ju bytes\n",
-                      (intmax_t)totalsize);
-
                mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
                if (!mappedfile) {
-                   perror("agedu: mmap");
+                   perror(PNAME ": mmap");
                    return 1;
                }
 
-               ib = indexbuild_new(mappedfile, st.st_size, count);
+               if (fakediratimes) {
+                   printf("Faking directory atimes\n");
+                   trie_fake_dir_atimes(mappedfile);
+               }
+
+               printf("Building index\n");
+               ib = indexbuild_new(mappedfile, st.st_size, count, &delta);
+               maxpathlen = trie_maxpathlen(mappedfile);
+               buf = snewn(maxpathlen, char);
+               prevbuf = snewn(maxpathlen, char);
                tw = triewalk_new(mappedfile);
-               while ((tf = triewalk_next(tw, NULL)) != NULL)
-                   indexbuild_add(ib, tf);
+               prevbuf[0] = '\0';
+               tf = triewalk_next(tw, buf);
+               assert(tf);
+               while (1) {
+                   int i;
+
+                   if (totalsize - indexbuild_realsize(ib) < delta) {
+                       const void *oldfile = mappedfile;
+                       ptrdiff_t diff;
+
+                       /*
+                        * Unmap the file, grow it, and remap it.
+                        */
+                       munmap(mappedfile, totalsize);
+
+                       totalsize += delta;
+                       totalsize += totalsize / 10;
+
+                       if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
+                           perror(PNAME ": lseek");
+                           return 1;
+                       }
+                       if (write(fd, "\0", 1) < 1) {
+                           perror(PNAME ": write");
+                           return 1;
+                       }
+
+                       mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
+                       if (!mappedfile) {
+                           perror(PNAME ": mmap");
+                           return 1;
+                       }
+
+                       indexbuild_rebase(ib, mappedfile);
+                       triewalk_rebase(tw, mappedfile);
+                       diff = (const unsigned char *)mappedfile -
+                           (const unsigned char *)oldfile;
+                       if (tf)
+                           tf = (const struct trie_file *)
+                               (((const unsigned char *)tf) + diff);
+                   }
+
+                   /*
+                    * Get the next file from the index. So we are
+                    * currently holding, and have not yet
+                    * indexed, prevtf (with pathname prevbuf) and
+                    * tf (with pathname buf).
+                    */
+                   prevtf = tf;
+                   memcpy(prevbuf, buf, maxpathlen);
+                   tf = triewalk_next(tw, buf);
+
+                   if (!tf)
+                       buf[0] = '\0';
+
+                   /*
+                    * Find the first differing character position
+                    * between our two pathnames.
+                    */
+                   for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++);
+
+                   /*
+                    * If prevbuf was a directory name and buf is
+                    * something inside that directory, then
+                    * trie_before() will be called on prevbuf
+                    * itself. Hence we must drop a tag before it,
+                    * so that the resulting index is usable.
+                    */
+                   if ((!prevbuf[i] && (buf[i] == pathsep ||
+                                        (i > 0 && buf[i-1] == pathsep))))
+                       indexbuild_tag(ib);
+
+                   /*
+                    * Add prevtf to the index.
+                    */
+                   indexbuild_add(ib, prevtf);
+
+                   if (!tf) {
+                       /*
+                        * Drop an unconditional final tag, and
+                        * get out of this loop.
+                        */
+                       indexbuild_tag(ib);
+                       break;
+                   }
+
+                   /*
+                    * If prevbuf was a filename inside some
+                    * directory which buf is outside, then
+                    * trie_before() will be called on some
+                    * pathname either equal to buf or epsilon
+                    * less than it. Either way, we're going to
+                    * need to drop a tag after prevtf.
+                    */
+                   if (strchr(prevbuf+i, pathsep) || !tf)
+                       indexbuild_tag(ib);
+               }
+
                triewalk_free(tw);
                realsize = indexbuild_realsize(ib);
                indexbuild_free(ib);
 
                munmap(mappedfile, totalsize);
-               ftruncate(fd, realsize);
+               if (ftruncate(fd, realsize) < 0)
+                    fatal("%s: truncate: %s\n", filename, strerror(errno));
                close(fd);
-               printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
+               printf("Final index file size = %llu bytes\n",
+                      (unsigned long long)realsize);
            }
        } else if (mode == TEXT) {
            char *querydir = actions[action].arg;
@@ -1028,15 +1263,20 @@ int main(int argc, char **argv)
                return 1;
            }
            if (fstat(fd, &st) < 0) {
-               perror("agedu: fstat");
+               perror(PNAME ": fstat");
                return 1;
            }
            totalsize = st.st_size;
            mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
            if (!mappedfile) {
-               perror("agedu: mmap");
+               perror(PNAME ": mmap");
                return 1;
            }
+            if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               return 1;
+            }
            pathsep = trie_pathsep(mappedfile);
 
            /*
@@ -1046,10 +1286,28 @@ int main(int argc, char **argv)
            if (pathlen > 0 && querydir[pathlen-1] == pathsep)
                querydir[--pathlen] = '\0';
 
-           text_query(mappedfile, querydir, textcutoff, 1);
+           if (!gotdepth)
+               depth = 1;             /* default for text mode */
+           if (outfile != NULL) {
+               FILE *fp = fopen(outfile, "w");
+               if (!fp) {
+                   fprintf(stderr, "%s: %s: open: %s\n", PNAME,
+                           outfile, strerror(errno));
+                   return 1;
+               }
+               text_query(mappedfile, querydir, textcutoff, showfiles,
+                          depth, fp);
+               fclose(fp);
+           } else {
+               text_query(mappedfile, querydir, textcutoff, showfiles,
+                          depth, stdout);
+           }
+
+           munmap(mappedfile, totalsize);
        } else if (mode == HTML) {
            char *querydir = actions[action].arg;
-           size_t pathlen;
+           size_t pathlen, maxpathlen;
+           char *pathbuf;
            struct html_config cfg;
            unsigned long xi;
            char *html;
@@ -1058,34 +1316,247 @@ int main(int argc, char **argv)
            if (fd < 0) {
                fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
                        strerror(errno));
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
                return 1;
            }
            if (fstat(fd, &st) < 0) {
-               perror("agedu: fstat");
+               fprintf(stderr, "%s: %s: fstat: %s\n", PNAME, filename,
+                       strerror(errno));
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
                return 1;
            }
            totalsize = st.st_size;
            mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
            if (!mappedfile) {
-               perror("agedu: mmap");
+               fprintf(stderr, "%s: %s: mmap: %s\n", PNAME, filename,
+                       strerror(errno));
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
+               return 1;
+           }
+           if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
                return 1;
            }
            pathsep = trie_pathsep(mappedfile);
 
-           /*
-            * Trim trailing slash, just in case.
-            */
-           pathlen = strlen(querydir);
-           if (pathlen > 0 && querydir[pathlen-1] == pathsep)
-               querydir[--pathlen] = '\0';
+           maxpathlen = trie_maxpathlen(mappedfile);
+           pathbuf = snewn(maxpathlen, char);
 
-           xi = trie_before(mappedfile, querydir);
-           cfg.format = NULL;
-           cfg.autoage = htmlautoagerange;
-           cfg.oldest = htmloldest;
-           cfg.newest = htmlnewest;
-           html = html_query(mappedfile, xi, &cfg);
-           fputs(html, stdout);
+           if (!querydir || !gotdepth) {
+               /*
+                * Single output file.
+                */
+               if (!querydir) {
+                    cfg.uriformat = "/%|/%p/%|%|/%p";
+               } else {
+                   cfg.uriformat = NULL;
+               }
+               cfg.autoage = htmlautoagerange;
+               cfg.oldest = htmloldest;
+               cfg.newest = htmlnewest;
+               cfg.showfiles = showfiles;
+           } else {
+                cfg.uriformat = "/index.html%|/%/p.html";
+                cfg.fileformat = "/index.html%|/%/p.html";
+               cfg.autoage = htmlautoagerange;
+               cfg.oldest = htmloldest;
+               cfg.newest = htmlnewest;
+               cfg.showfiles = showfiles;
+           }
+            cfg.html_title = html_title;
+
+           if (!querydir) {
+               /*
+                * If we're run in --cgi mode, read PATH_INFO to get
+                * a numeric pathname index.
+                */
+               char *path_info = getenv("PATH_INFO");
+
+               if (!path_info)
+                   path_info = "";
+
+                /*
+                 * Parse the path.
+                 */
+                if (!html_parse_path(mappedfile, path_info, &cfg, &xi)) {
+                   printf("Status: 404\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>404 Not Found</title>"
+                          "</head><body>"
+                          "<h1>400 Not Found</h1>"
+                          "<p>Invalid <code>agedu</code> pathname."
+                          "</body></html>\n");
+                   return 0;
+               }
+
+               /*
+                * If the path was parseable but not canonically
+                * expressed, return a redirect to the canonical
+                * version.
+                */
+                char *canonpath = html_format_path(mappedfile, &cfg, xi);
+               if (strcmp(canonpath, path_info)) {
+                   char *servername = getenv("SERVER_NAME");
+                   char *scriptname = getenv("SCRIPT_NAME");
+                   if (!servername || !scriptname) {
+                       if (servername)
+                           fprintf(stderr, "%s: SCRIPT_NAME unset\n", PNAME);
+                       else if (scriptname)
+                           fprintf(stderr, "%s: SCRIPT_NAME unset\n", PNAME);
+                       else
+                           fprintf(stderr, "%s: SERVER_NAME and "
+                                   "SCRIPT_NAME both unset\n", PNAME);
+                       printf("Status: 500\nContent-type: text/html\n\n"
+                              "<html><head>"
+                              "<title>500 Internal Server Error</title>"
+                              "</head><body>"
+                              "<h1>500 Internal Server Error</h1>"
+                              "<p><code>agedu</code> suffered an internal "
+                              "error."
+                              "</body></html>\n");
+                       return 0;
+                   }
+                   printf("Status: 301\n"
+                          "Location: http://%s/%s%s\n"
+                          "Content-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>301 Moved</title>"
+                          "</head><body>"
+                          "<h1>301 Moved</h1>"
+                          "<p>Moved."
+                          "</body></html>\n",
+                          servername, scriptname, canonpath);
+                   return 0;
+               }
+
+           } else {
+               /*
+                * In ordinary --html mode, process a query
+                * directory passed in on the command line.
+                */
+
+               /*
+                * Trim trailing slash, just in case.
+                */
+               pathlen = strlen(querydir);
+               if (pathlen > 0 && querydir[pathlen-1] == pathsep)
+                   querydir[--pathlen] = '\0';
+
+               xi = trie_before(mappedfile, querydir);
+               if (xi >= trie_count(mappedfile) ||
+                   (trie_getpath(mappedfile, xi, pathbuf),
+                    strcmp(pathbuf, querydir))) {
+                   fprintf(stderr, "%s: pathname '%s' does not exist in index\n"
+                           "%*s(check it is spelled exactly as it is in the "
+                           "index, including\n%*sany leading './')\n",
+                           PNAME, querydir,
+                           (int)(1+sizeof(PNAME)), "",
+                           (int)(1+sizeof(PNAME)), "");
+                   return 1;
+               } else if (!index_has_root(mappedfile, xi)) {
+                   fprintf(stderr, "%s: pathname '%s' is"
+                           " a file, not a directory\n", PNAME, querydir);
+                   return 1;
+               }
+           }
+
+           if (!querydir || !gotdepth) {
+               /*
+                * Single output file.
+                */
+               html = html_query(mappedfile, xi, &cfg, 1);
+               if (querydir && outfile != NULL) {
+                   FILE *fp = fopen(outfile, "w");
+                   if (!fp) {
+                       fprintf(stderr, "%s: %s: open: %s\n", PNAME,
+                               outfile, strerror(errno));
+                       return 1;
+                   } else if (fputs(html, fp) < 0) {
+                       fprintf(stderr, "%s: %s: write: %s\n", PNAME,
+                               outfile, strerror(errno));
+                       fclose(fp);
+                       return 1;
+                   } else if (fclose(fp) < 0) {
+                       fprintf(stderr, "%s: %s: fclose: %s\n", PNAME,
+                               outfile, strerror(errno));
+                       return 1;
+                   }
+               } else {
+                   if (!querydir) {
+                       printf("Content-type: text/html\n\n");
+                   }
+                   fputs(html, stdout);
+               }
+           } else {
+               /*
+                * Multiple output files.
+                */
+               int dirlen = outfile ? 2+strlen(outfile) : 3;
+               char prefix[dirlen];
+               if (outfile) {
+                   if (mkdir(outfile, 0777) < 0 && errno != EEXIST) {
+                       fprintf(stderr, "%s: %s: mkdir: %s\n", PNAME,
+                               outfile, strerror(errno));
+                       return 1;
+                   }
+                   snprintf(prefix, dirlen, "%s/", outfile);
+               } else
+                   snprintf(prefix, dirlen, "./");
+
+               unsigned long xi2;
+               /*
+                * pathbuf is only set up in the plain-HTML case and
+                * not in the CGI case; but that's OK, because the
+                * CGI case can't come to this branch of the if
+                * anyway.
+                */
+               make_successor(pathbuf);
+               xi2 = trie_before(mappedfile, pathbuf);
+
+               if (html_dump(mappedfile, xi, xi2, depth, &cfg, prefix))
+                   return 1;
+           }
+
+           munmap(mappedfile, totalsize);
+           sfree(pathbuf);
        } else if (mode == DUMP) {
            size_t maxpathlen;
            char *buf;
@@ -1097,25 +1568,32 @@ int main(int argc, char **argv)
                return 1;
            }
            if (fstat(fd, &st) < 0) {
-               perror("agedu: fstat");
+               perror(PNAME ": fstat");
                return 1;
            }
            totalsize = st.st_size;
            mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
            if (!mappedfile) {
-               perror("agedu: mmap");
+               perror(PNAME ": mmap");
                return 1;
            }
+            if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               return 1;
+            }
            pathsep = trie_pathsep(mappedfile);
 
            maxpathlen = trie_maxpathlen(mappedfile);
            buf = snewn(maxpathlen, char);
 
-           printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep);
+           printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
            tw = triewalk_new(mappedfile);
            while ((tf = triewalk_next(tw, buf)) != NULL)
                dump_line(buf, tf);
            triewalk_free(tw);
+
+           munmap(mappedfile, totalsize);
        } else if (mode == HTTPD) {
            struct html_config pcfg;
            struct httpd_config dcfg;
@@ -1127,25 +1605,40 @@ int main(int argc, char **argv)
                return 1;
            }
            if (fstat(fd, &st) < 0) {
-               perror("agedu: fstat");
+               perror(PNAME ": fstat");
                return 1;
            }
            totalsize = st.st_size;
            mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
            if (!mappedfile) {
-               perror("agedu: mmap");
+               perror(PNAME ": mmap");
                return 1;
            }
+            if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               return 1;
+            }
            pathsep = trie_pathsep(mappedfile);
 
            dcfg.address = httpserveraddr;
            dcfg.port = httpserverport;
+           dcfg.closeoneof = closeoneof;
            dcfg.basicauthdata = httpauthdata;
-           pcfg.format = NULL;
+           pcfg.uriformat = "/%|/%p/%|%|/%p";
            pcfg.autoage = htmlautoagerange;
            pcfg.oldest = htmloldest;
            pcfg.newest = htmlnewest;
+           pcfg.showfiles = showfiles;
+            pcfg.html_title = html_title;
            run_httpd(mappedfile, auth, &dcfg, &pcfg);
+           munmap(mappedfile, totalsize);
+       } else if (mode == REMOVE) {
+           if (remove(filename) < 0) {
+               fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename,
+                       strerror(errno));
+               return 1;
+           }
        }
     }