X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/blobdiff_plain/cc7db507cc53258e23c148c690c9e450214f93ac..c1065ef6db11830ffb544b5ffabe6af3e59688f6:/agedu.c diff --git a/agedu.c b/agedu.c index 76e884c..b253f18 100644 --- a/agedu.c +++ b/agedu.c @@ -56,14 +56,18 @@ struct ctx { static void dump_line(const char *pathname, const struct trie_file *tf) { const char *p; - printf("%llu %llu ", tf->size, tf->atime); + if (printf("%llu %llu ", tf->size, tf->atime) < 0) goto error; for (p = pathname; *p; p++) { - if (*p >= ' ' && *p < 127 && *p != '%') - putchar(*p); - else - printf("%%%02x", (unsigned char)*p); + if (*p >= ' ' && *p < 127 && *p != '%') { + if (putchar(*p) == EOF) goto error; + } else { + if (printf("%%%02x", (unsigned char)*p) < 0) goto error; + } } - putchar('\n'); + if (putchar('\n') == EOF) goto error; + return; + error: + fatal("standard output: %s", strerror(errno)); } static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) @@ -90,7 +94,7 @@ static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode))) file.atime = st->st_mtime; else - file.atime = st->st_atime; + file.atime = max(st->st_mtime, st->st_atime); /* * Filter based on wildcards. @@ -140,13 +144,31 @@ static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) return 1; } +static void scan_error(void *vctx, const char *fmt, ...) +{ + struct ctx *ctx = (struct ctx *)vctx; + va_list ap; + + if (ctx->progress) { + fprintf(stderr, "%-*s\r", ctx->progwidth, ""); + fflush(stderr); + } + + fprintf(stderr, "%s: ", PNAME); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + ctx->last_output_update--; /* force a progress report next time */ +} + static void text_query(const void *mappedfile, const char *querydir, - time_t t, int depth) + time_t t, int showfiles, int depth) { size_t maxpathlen; char *pathbuf; unsigned long xi1, xi2; - unsigned long long s1, s2; + unsigned long long size; maxpathlen = trie_maxpathlen(mappedfile); pathbuf = snewn(maxpathlen + 1, char); @@ -161,16 +183,32 @@ static void text_query(const void *mappedfile, const char *querydir, xi1 = trie_before(mappedfile, querydir); xi2 = trie_before(mappedfile, pathbuf); - if (xi2 - xi1 == 1) + if (!showfiles && xi2 - xi1 == 1) return; /* file, or empty dir => no display */ /* * Now do the lookups in the age index. */ - s1 = index_query(mappedfile, xi1, t); - s2 = index_query(mappedfile, xi2, t); + if (xi2 - xi1 == 1) { + /* + * We are querying an individual file, so we should not + * depend on the index entries either side of the node, + * since they almost certainly don't both exist. Instead, + * just look up the file's size and atime in the main trie. + */ + const struct trie_file *f = trie_getfile(mappedfile, xi1); + if (f->atime < t) + size = f->size; + else + size = 0; + } else { + unsigned long long s1, s2; + s1 = index_query(mappedfile, xi1, t); + s2 = index_query(mappedfile, xi2, t); + size = s2 - s1; + } - if (s1 == s2) + if (size == 0) return; /* no space taken up => no display */ if (depth > 0) { @@ -181,14 +219,14 @@ static void text_query(const void *mappedfile, const char *querydir, xi1++; while (xi1 < xi2) { trie_getpath(mappedfile, xi1, pathbuf); - text_query(mappedfile, pathbuf, t, depth-1); + text_query(mappedfile, pathbuf, t, showfiles, depth-1); make_successor(pathbuf); xi1 = trie_before(mappedfile, pathbuf); } } /* Display in units of 1Kb */ - printf("%-11llu %s\n", (s2 - s1) / 1024, querydir); + printf("%-11llu %s\n", (size) / 1024, querydir); } /* @@ -302,6 +340,8 @@ static void text_query(const void *mappedfile, const char *querydir, HELPOPT("[--scan,--load] fake atimes on directories") \ NOVAL(MTIME) LONG(mtime) \ HELPOPT("[--scan] use mtime instead of atime") \ + NOVAL(SHOWFILES) LONG(files) \ + HELPOPT("[--web,--html,--text] list individual files") \ VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \ HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \ VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \ @@ -441,7 +481,7 @@ int main(int argc, char **argv) void *mappedfile; triewalk *tw; indexbuild *ib; - const struct trie_file *tf; + const struct trie_file *tf, *prevtf; char *filename = PNAME ".dat"; int doing_opts = 1; enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE }; @@ -464,6 +504,7 @@ int main(int argc, char **argv) int tqdepth = 1; int fakediratimes = 1; int mtime = 0; + int showfiles = 0; #ifdef DEBUG_MAD_OPTION_PARSING_MACROS { @@ -714,6 +755,9 @@ int main(int argc, char **argv) case OPT_NODIRATIME: fakediratimes = 1; break; + case OPT_SHOWFILES: + showfiles = 1; + break; case OPT_MTIME: mtime = 1; break; @@ -869,6 +913,7 @@ int main(int argc, char **argv) if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { const char *scandir = actions[action].arg; + if (mode == LOAD) { char *buf = fgetline(stdin); unsigned newpathsep; @@ -930,7 +975,9 @@ int main(int argc, char **argv) ctx->progress = progress; { struct winsize ws; - if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0) + if (progress && + ioctl(2, TIOCGWINSZ, &ws) == 0 && + ws.ws_col > 0) ctx->progwidth = ws.ws_col - 1; else ctx->progwidth = 79; @@ -995,9 +1042,10 @@ int main(int argc, char **argv) } p++; } + } else { + p++; } *q++ = c; - p++; } *q = '\0'; triebuild_add(ctx->tb, buf, &tf); @@ -1005,9 +1053,13 @@ int main(int argc, char **argv) line++; } } else { - du(scandir, gotdata, ctx); + du(scandir, gotdata, scan_error, ctx); } if (mode != SCANDUMP) { + size_t maxpathlen; + size_t delta; + char *buf, *prevbuf; + count = triebuild_finish(ctx->tb); triebuild_free(ctx->tb); @@ -1025,10 +1077,12 @@ int main(int argc, char **argv) return 1; } - printf("Built pathname index, %d entries, %llu bytes\n", count, + printf("Built pathname index, %d entries," + " %llu bytes of index\n", count, (unsigned long long)st.st_size); - totalsize = index_compute_size(st.st_size, count); + totalsize = index_initial_size(st.st_size, count); + totalsize += totalsize / 10; if (lseek(fd, totalsize-1, SEEK_SET) < 0) { perror(PNAME ": lseek"); @@ -1039,9 +1093,6 @@ int main(int argc, char **argv) return 1; } - printf("Upper bound on index file size = %llu bytes\n", - (unsigned long long)totalsize); - mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); if (!mappedfile) { perror(PNAME ": mmap"); @@ -1054,10 +1105,112 @@ int main(int argc, char **argv) } printf("Building index\n"); - ib = indexbuild_new(mappedfile, st.st_size, count); + ib = indexbuild_new(mappedfile, st.st_size, count, &delta); + maxpathlen = trie_maxpathlen(mappedfile); + buf = snewn(maxpathlen, char); + prevbuf = snewn(maxpathlen, char); tw = triewalk_new(mappedfile); - while ((tf = triewalk_next(tw, NULL)) != NULL) - indexbuild_add(ib, tf); + prevbuf[0] = '\0'; + tf = triewalk_next(tw, buf); + assert(tf); + while (1) { + int i; + + if (totalsize - indexbuild_realsize(ib) < delta) { + const void *oldfile = mappedfile; + ptrdiff_t diff; + + /* + * Unmap the file, grow it, and remap it. + */ + munmap(mappedfile, totalsize); + + totalsize += delta; + totalsize += totalsize / 10; + + if (lseek(fd, totalsize-1, SEEK_SET) < 0) { + perror(PNAME ": lseek"); + return 1; + } + if (write(fd, "\0", 1) < 1) { + perror(PNAME ": write"); + return 1; + } + + mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); + if (!mappedfile) { + perror(PNAME ": mmap"); + return 1; + } + + indexbuild_rebase(ib, mappedfile); + triewalk_rebase(tw, mappedfile); + diff = (const unsigned char *)mappedfile - + (const unsigned char *)oldfile; + if (prevtf) + prevtf = (const struct trie_file *) + (((const unsigned char *)prevtf) + diff); + if (tf) + tf = (const struct trie_file *) + (((const unsigned char *)tf) + diff); + } + + /* + * Get the next file from the index. So we are + * currently holding, and have not yet + * indexed, prevtf (with pathname prevbuf) and + * tf (with pathname buf). + */ + prevtf = tf; + memcpy(prevbuf, buf, maxpathlen); + tf = triewalk_next(tw, buf); + + if (!tf) + buf[0] = '\0'; + + /* + * Find the first differing character position + * between our two pathnames. + */ + for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++); + + /* + * If prevbuf was a directory name and buf is + * something inside that directory, then + * trie_before() will be called on prevbuf + * itself. Hence we must drop a tag before it, + * so that the resulting index is usable. + */ + if ((!prevbuf[i] && (buf[i] == pathsep || + (i > 0 && buf[i-1] == pathsep)))) + indexbuild_tag(ib); + + /* + * Add prevtf to the index. + */ + indexbuild_add(ib, prevtf); + + if (!tf) { + /* + * Drop an unconditional final tag, and + * get out of this loop. + */ + indexbuild_tag(ib); + break; + } + + /* + * If prevbuf was a filename inside some + * directory which buf is outside, then + * trie_before() will be called on some + * pathname either equal to buf or epsilon + * less than it. Either way, we're going to + * need to drop a tag after prevtf. + */ + if (strchr(prevbuf+i, pathsep) || !tf) + indexbuild_tag(ib); + } + triewalk_free(tw); realsize = indexbuild_realsize(ib); indexbuild_free(ib); @@ -1065,7 +1218,7 @@ int main(int argc, char **argv) munmap(mappedfile, totalsize); ftruncate(fd, realsize); close(fd); - printf("Actual index file size = %llu bytes\n", + printf("Final index file size = %llu bytes\n", (unsigned long long)realsize); } } else if (mode == TEXT) { @@ -1097,12 +1250,13 @@ int main(int argc, char **argv) if (pathlen > 0 && querydir[pathlen-1] == pathsep) querydir[--pathlen] = '\0'; - text_query(mappedfile, querydir, textcutoff, tqdepth); + text_query(mappedfile, querydir, textcutoff, showfiles, tqdepth); munmap(mappedfile, totalsize); } else if (mode == HTML) { char *querydir = actions[action].arg; - size_t pathlen; + size_t pathlen, maxpathlen; + char *pathbuf; struct html_config cfg; unsigned long xi; char *html; @@ -1125,6 +1279,9 @@ int main(int argc, char **argv) } pathsep = trie_pathsep(mappedfile); + maxpathlen = trie_maxpathlen(mappedfile); + pathbuf = snewn(maxpathlen, char); + /* * Trim trailing slash, just in case. */ @@ -1133,14 +1290,30 @@ int main(int argc, char **argv) querydir[--pathlen] = '\0'; xi = trie_before(mappedfile, querydir); - cfg.format = NULL; - cfg.autoage = htmlautoagerange; - cfg.oldest = htmloldest; - cfg.newest = htmlnewest; - html = html_query(mappedfile, xi, &cfg); - fputs(html, stdout); + if (xi >= trie_count(mappedfile) || + (trie_getpath(mappedfile, xi, pathbuf), + strcmp(pathbuf, querydir))) { + fprintf(stderr, "%s: pathname '%s' does not exist in index\n" + "%*s(check it is spelled exactly as it is in the " + "index, including\n%*sany leading './')\n", + PNAME, querydir, + (int)(1+sizeof(PNAME)), "", + (int)(1+sizeof(PNAME)), ""); + } else if (!index_has_root(mappedfile, xi)) { + fprintf(stderr, "%s: pathname '%s' is" + " a file, not a directory\n", PNAME, querydir); + } else { + cfg.format = NULL; + cfg.autoage = htmlautoagerange; + cfg.oldest = htmloldest; + cfg.newest = htmlnewest; + cfg.showfiles = showfiles; + html = html_query(mappedfile, xi, &cfg); + fputs(html, stdout); + } munmap(mappedfile, totalsize); + sfree(pathbuf); } else if (mode == DUMP) { size_t maxpathlen; char *buf; @@ -1202,6 +1375,7 @@ int main(int argc, char **argv) pcfg.autoage = htmlautoagerange; pcfg.oldest = htmloldest; pcfg.newest = htmlnewest; + pcfg.showfiles = showfiles; run_httpd(mappedfile, auth, &dcfg, &pcfg); munmap(mappedfile, totalsize); } else if (mode == REMOVE) {