About time I wrote up the data structure used in agedu, since it's
[sgt/agedu] / agedu.c
diff --git a/agedu.c b/agedu.c
index c1ed291..b253f18 100644 (file)
--- a/agedu.c
+++ b/agedu.c
@@ -56,14 +56,18 @@ struct ctx {
 static void dump_line(const char *pathname, const struct trie_file *tf)
 {
     const char *p;
-    printf("%llu %llu ", tf->size, tf->atime);
+    if (printf("%llu %llu ", tf->size, tf->atime) < 0) goto error;
     for (p = pathname; *p; p++) {
-       if (*p >= ' ' && *p < 127 && *p != '%')
-           putchar(*p);
-       else
-           printf("%%%02x", (unsigned char)*p);
+       if (*p >= ' ' && *p < 127 && *p != '%') {
+           if (putchar(*p) == EOF) goto error;
+       } else {
+           if (printf("%%%02x", (unsigned char)*p) < 0) goto error;
+       }
     }
-    putchar('\n');
+    if (putchar('\n') == EOF) goto error;
+    return;
+    error:
+    fatal("standard output: %s", strerror(errno));
 }
 
 static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
@@ -90,7 +94,7 @@ static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
     if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode)))
        file.atime = st->st_mtime;
     else
-       file.atime = st->st_atime;
+       file.atime = max(st->st_mtime, st->st_atime);
 
     /*
      * Filter based on wildcards.
@@ -159,12 +163,12 @@ static void scan_error(void *vctx, const char *fmt, ...)
 }
 
 static void text_query(const void *mappedfile, const char *querydir,
-                      time_t t, int depth)
+                      time_t t, int showfiles, int depth)
 {
     size_t maxpathlen;
     char *pathbuf;
     unsigned long xi1, xi2;
-    unsigned long long s1, s2;
+    unsigned long long size;
 
     maxpathlen = trie_maxpathlen(mappedfile);
     pathbuf = snewn(maxpathlen + 1, char);
@@ -179,16 +183,32 @@ static void text_query(const void *mappedfile, const char *querydir,
     xi1 = trie_before(mappedfile, querydir);
     xi2 = trie_before(mappedfile, pathbuf);
 
-    if (xi2 - xi1 == 1)
+    if (!showfiles && xi2 - xi1 == 1)
        return;                        /* file, or empty dir => no display */
 
     /*
      * Now do the lookups in the age index.
      */
-    s1 = index_query(mappedfile, xi1, t);
-    s2 = index_query(mappedfile, xi2, t);
+    if (xi2 - xi1 == 1) {
+       /*
+        * We are querying an individual file, so we should not
+        * depend on the index entries either side of the node,
+        * since they almost certainly don't both exist. Instead,
+        * just look up the file's size and atime in the main trie.
+        */
+       const struct trie_file *f = trie_getfile(mappedfile, xi1);
+       if (f->atime < t)
+           size = f->size;
+       else
+           size = 0;
+    } else {
+       unsigned long long s1, s2;
+       s1 = index_query(mappedfile, xi1, t);
+       s2 = index_query(mappedfile, xi2, t);
+       size = s2 - s1;
+    }
 
-    if (s1 == s2)
+    if (size == 0)
        return;                        /* no space taken up => no display */
 
     if (depth > 0) {
@@ -199,14 +219,14 @@ static void text_query(const void *mappedfile, const char *querydir,
        xi1++;
        while (xi1 < xi2) {
            trie_getpath(mappedfile, xi1, pathbuf);
-           text_query(mappedfile, pathbuf, t, depth-1);
+           text_query(mappedfile, pathbuf, t, showfiles, depth-1);
            make_successor(pathbuf);
            xi1 = trie_before(mappedfile, pathbuf);
        }
     }
 
     /* Display in units of 1Kb */
-    printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
+    printf("%-11llu %s\n", (size) / 1024, querydir);
 }
 
 /*
@@ -320,8 +340,8 @@ static void text_query(const void *mappedfile, const char *querydir,
         HELPOPT("[--scan,--load] fake atimes on directories") \
     NOVAL(MTIME) LONG(mtime) \
         HELPOPT("[--scan] use mtime instead of atime") \
-    NOVAL(FULL) LONG(full_index) \
-        HELPOPT("[--scan] index every file individually") \
+    NOVAL(SHOWFILES) LONG(files) \
+        HELPOPT("[--web,--html,--text] list individual files") \
     VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
         HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \
     VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
@@ -484,7 +504,7 @@ int main(int argc, char **argv)
     int tqdepth = 1;
     int fakediratimes = 1;
     int mtime = 0;
-    int fullindex = 0;
+    int showfiles = 0;
 
 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
     {
@@ -735,12 +755,12 @@ int main(int argc, char **argv)
                  case OPT_NODIRATIME:
                    fakediratimes = 1;
                    break;
+                 case OPT_SHOWFILES:
+                   showfiles = 1;
+                   break;
                  case OPT_MTIME:
                    mtime = 1;
                    break;
-                 case OPT_FULL:
-                   fullindex = 1;
-                   break;
                  case OPT_DATAFILE:
                    filename = optval;
                    break;
@@ -955,7 +975,9 @@ int main(int argc, char **argv)
            ctx->progress = progress;
            {
                struct winsize ws;
-               if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
+               if (progress &&
+                   ioctl(2, TIOCGWINSZ, &ws) == 0 &&
+                   ws.ws_col > 0)
                    ctx->progwidth = ws.ws_col - 1;
                else
                    ctx->progwidth = 79;
@@ -1020,9 +1042,10 @@ int main(int argc, char **argv)
                                }
                                p++;
                            }
+                       } else {
+                           p++;
                        }
                        *q++ = c;
-                       p++;
                    }
                    *q = '\0';
                    triebuild_add(ctx->tb, buf, &tf);
@@ -1034,6 +1057,7 @@ int main(int argc, char **argv)
            }
            if (mode != SCANDUMP) {
                size_t maxpathlen;
+               size_t delta;
                char *buf, *prevbuf;
 
                count = triebuild_finish(ctx->tb);
@@ -1053,10 +1077,12 @@ int main(int argc, char **argv)
                    return 1;
                }
 
-               printf("Built pathname index, %d entries, %llu bytes\n", count,
+               printf("Built pathname index, %d entries,"
+                      " %llu bytes of index\n", count,
                       (unsigned long long)st.st_size);
 
-               totalsize = index_compute_size(st.st_size, count);
+               totalsize = index_initial_size(st.st_size, count);
+               totalsize += totalsize / 10;
 
                if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
                    perror(PNAME ": lseek");
@@ -1067,9 +1093,6 @@ int main(int argc, char **argv)
                    return 1;
                }
 
-               printf("Upper bound on index file size = %llu bytes\n",
-                      (unsigned long long)totalsize);
-
                mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
                if (!mappedfile) {
                    perror(PNAME ": mmap");
@@ -1082,7 +1105,7 @@ int main(int argc, char **argv)
                }
 
                printf("Building index\n");
-               ib = indexbuild_new(mappedfile, st.st_size, count);
+               ib = indexbuild_new(mappedfile, st.st_size, count, &delta);
                maxpathlen = trie_maxpathlen(mappedfile);
                buf = snewn(maxpathlen, char);
                prevbuf = snewn(maxpathlen, char);
@@ -1093,6 +1116,45 @@ int main(int argc, char **argv)
                while (1) {
                    int i;
 
+                   if (totalsize - indexbuild_realsize(ib) < delta) {
+                       const void *oldfile = mappedfile;
+                       ptrdiff_t diff;
+
+                       /*
+                        * Unmap the file, grow it, and remap it.
+                        */
+                       munmap(mappedfile, totalsize);
+
+                       totalsize += delta;
+                       totalsize += totalsize / 10;
+
+                       if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
+                           perror(PNAME ": lseek");
+                           return 1;
+                       }
+                       if (write(fd, "\0", 1) < 1) {
+                           perror(PNAME ": write");
+                           return 1;
+                       }
+
+                       mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
+                       if (!mappedfile) {
+                           perror(PNAME ": mmap");
+                           return 1;
+                       }
+
+                       indexbuild_rebase(ib, mappedfile);
+                       triewalk_rebase(tw, mappedfile);
+                       diff = (const unsigned char *)mappedfile -
+                           (const unsigned char *)oldfile;
+                       if (prevtf)
+                           prevtf = (const struct trie_file *)
+                               (((const unsigned char *)prevtf) + diff);
+                       if (tf)
+                           tf = (const struct trie_file *)
+                               (((const unsigned char *)tf) + diff);
+                   }
+
                    /*
                     * Get the next file from the index. So we are
                     * currently holding, and have not yet
@@ -1136,12 +1198,6 @@ int main(int argc, char **argv)
                        indexbuild_tag(ib);
                        break;
                    }
-                       
-                   /*
-                    * In full-index mode, index everything.
-                    */
-                   if (fullindex)
-                       indexbuild_tag(ib);
 
                    /*
                     * If prevbuf was a filename inside some
@@ -1162,7 +1218,7 @@ int main(int argc, char **argv)
                munmap(mappedfile, totalsize);
                ftruncate(fd, realsize);
                close(fd);
-               printf("Actual index file size = %llu bytes\n",
+               printf("Final index file size = %llu bytes\n",
                       (unsigned long long)realsize);
            }
        } else if (mode == TEXT) {
@@ -1194,12 +1250,13 @@ int main(int argc, char **argv)
            if (pathlen > 0 && querydir[pathlen-1] == pathsep)
                querydir[--pathlen] = '\0';
 
-           text_query(mappedfile, querydir, textcutoff, tqdepth);
+           text_query(mappedfile, querydir, textcutoff, showfiles, tqdepth);
 
            munmap(mappedfile, totalsize);
        } else if (mode == HTML) {
            char *querydir = actions[action].arg;
-           size_t pathlen;
+           size_t pathlen, maxpathlen;
+           char *pathbuf;
            struct html_config cfg;
            unsigned long xi;
            char *html;
@@ -1222,6 +1279,9 @@ int main(int argc, char **argv)
            }
            pathsep = trie_pathsep(mappedfile);
 
+           maxpathlen = trie_maxpathlen(mappedfile);
+           pathbuf = snewn(maxpathlen, char);
+
            /*
             * Trim trailing slash, just in case.
             */
@@ -1230,14 +1290,30 @@ int main(int argc, char **argv)
                querydir[--pathlen] = '\0';
 
            xi = trie_before(mappedfile, querydir);
-           cfg.format = NULL;
-           cfg.autoage = htmlautoagerange;
-           cfg.oldest = htmloldest;
-           cfg.newest = htmlnewest;
-           html = html_query(mappedfile, xi, &cfg);
-           fputs(html, stdout);
+           if (xi >= trie_count(mappedfile) ||
+               (trie_getpath(mappedfile, xi, pathbuf),
+                strcmp(pathbuf, querydir))) {
+               fprintf(stderr, "%s: pathname '%s' does not exist in index\n"
+                       "%*s(check it is spelled exactly as it is in the "
+                       "index, including\n%*sany leading './')\n",
+                       PNAME, querydir,
+                       (int)(1+sizeof(PNAME)), "",
+                       (int)(1+sizeof(PNAME)), "");
+           } else if (!index_has_root(mappedfile, xi)) {
+               fprintf(stderr, "%s: pathname '%s' is"
+                       " a file, not a directory\n", PNAME, querydir);
+           } else {
+               cfg.format = NULL;
+               cfg.autoage = htmlautoagerange;
+               cfg.oldest = htmloldest;
+               cfg.newest = htmlnewest;
+               cfg.showfiles = showfiles;
+               html = html_query(mappedfile, xi, &cfg);
+               fputs(html, stdout);
+           }
 
            munmap(mappedfile, totalsize);
+           sfree(pathbuf);
        } else if (mode == DUMP) {
            size_t maxpathlen;
            char *buf;
@@ -1299,6 +1375,7 @@ int main(int argc, char **argv)
            pcfg.autoage = htmlautoagerange;
            pcfg.oldest = htmloldest;
            pcfg.newest = htmlnewest;
+           pcfg.showfiles = showfiles;
            run_httpd(mappedfile, auth, &dcfg, &pcfg);
            munmap(mappedfile, totalsize);
        } else if (mode == REMOVE) {