From 9d0b959602cf1ca55fa011e4143ab4c28914a2d7 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 30 Oct 2008 20:08:07 +0000 Subject: [PATCH] Additional options to control the disk scanning: wildcard-based includes and excludes, control of the cross-fs-boundary prohibition. git-svn-id: svn://svn.tartarus.org/sgt/agedu@8230 cda61777-01e9-0310-a592-d414129be87e --- TODO | 27 +++++++++++++++------------ agedu.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/TODO b/TODO index 6df0eb3..f4f4f0f 100644 --- a/TODO +++ b/TODO @@ -14,23 +14,24 @@ Before it's non-embarrassingly releasable: latter by at least some means which doesn't involve it showing up in "ps" - - do some configurability for the disk scan - * wildcard-based includes and excludes - + wildcards can act on the last pathname component or the - whole lot - + include and exclude can be interleaved; implicit "include - *" before any - * reinstate filesystem crossing, though not doing so should - remain the default - - - work out what to do about atimes on directories + - work out what to do about atimes on directories in the absence of + the Linux syscall magic * one option is to read them during the scan and reinstate them after each recursion pop. Race-condition prone. - * marking them in a distinctive colour in the reports is the - other option. + * marking them in a distinctive colour in the reports is another + option. + * a third option is simply to ignore space taken up by + directories in the first place; inaccurate but terribly simple. + * incidentally, sometimes open(...,O_NOATIME) will fail, and + then we have to fall back to ordinary open. Be prepared to do + this, which probably means getting rid of the icky macro + hackery in du.c and turning it into a more sensible run-time + abstraction layer. - make a final decision on the name! + - man page, licence, online help. + Future directions: - run-time configuration in the HTTP server @@ -92,6 +93,8 @@ Future directions: * configure use of /proc/net/tcp * configure use of /dev/random * configure use of Linux syscall magic replacing readdir + + later glibcs have fdopendir, hooray! So we can use that + too, if it's available and O_NOATIME is too. * what do we do elsewhere about _GNU_SOURCE? + http://msdn.microsoft.com/en-us/library/ms724290.aspx suggest modern Windowses support atime-equivalents, so a Windows port diff --git a/agedu.c b/agedu.c index c1955b4..c0e1bc0 100644 --- a/agedu.c +++ b/agedu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "du.h" #include "trie.h" @@ -38,12 +39,21 @@ void fatal(const char *fmt, ...) exit(1); } +struct inclusion_exclusion { + int include; + const char *wildcard; + int path; +}; + struct ctx { triebuild *tb; dev_t datafile_dev, filesystem_dev; ino_t datafile_ino; time_t last_output_update; int progress, progwidth; + struct inclusion_exclusion *inex; + int ninex; + int crossfs; }; static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) @@ -51,6 +61,8 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) struct ctx *ctx = (struct ctx *)vctx; struct trie_file file; time_t t; + int i, include; + const char *filename; /* * Filter out our own data file. @@ -60,15 +72,27 @@ static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) /* * Don't cross the streams^W^Wany file system boundary. - * (FIXME: this should be a configurable option.) */ - if (st->st_dev != ctx->filesystem_dev) + if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev) return 0; /* - * FIXME: other filtering in gotdata will be needed, when we - * implement serious filtering. + * Filter based on wildcards. */ + include = 1; + filename = strrchr(pathname, '/'); + if (!filename) + filename = pathname; + else + filename++; + for (i = 0; i < ctx->ninex; i++) { + if (fnmatch(ctx->inex[i].wildcard, + ctx->inex[i].path ? pathname : filename, + FNM_PATHNAME) == 0) + include = ctx->inex[i].include; + } + if (!include) + return 1; /* filter, but don't prune */ file.blocks = st->st_blocks; file.atime = st->st_atime; @@ -148,6 +172,9 @@ int main(int argc, char **argv) char *minage = "0d"; int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; int progress = 1; + struct inclusion_exclusion *inex = NULL; + int ninex = 0, inexsize = 0; + int crossfs = 0; while (--argc > 0) { char *p = *++argv; @@ -190,6 +217,10 @@ int main(int argc, char **argv) !strcmp(p, "--progress-tty") || !strcmp(p, "--scan-progress-tty")) { progress = 1; + } else if (!strcmp(p, "--crossfs")) { + crossfs = 1; + } else if (!strcmp(p, "--no-crossfs")) { + crossfs = 0; } else if (!strcmp(p, "--file") || !strcmp(p, "--auth") || !strcmp(p, "--http-auth") || @@ -197,7 +228,11 @@ int main(int argc, char **argv) !strcmp(p, "--server-auth") || !strcmp(p, "--minimum-age") || !strcmp(p, "--min-age") || - !strcmp(p, "--age")) { + !strcmp(p, "--age") || + !strcmp(p, "--include") || + !strcmp(p, "--include-path") || + !strcmp(p, "--exclude") || + !strcmp(p, "--exclude-path")) { /* * Long options requiring values. */ @@ -235,6 +270,21 @@ int main(int argc, char **argv) PNAME, optval, (int)strlen(PNAME), ""); return 1; } + } else if (!strcmp(p, "--include") || + !strcmp(p, "--include-path") || + !strcmp(p, "--exclude") || + !strcmp(p, "--exclude-path")) { + if (ninex >= inexsize) { + inexsize = ninex * 3 / 2 + 16; + inex = sresize(inex, inexsize, + struct inclusion_exclusion); + } + inex[ninex].path = (!strcmp(p, "--include-path") || + !strcmp(p, "--exclude-path")); + inex[ninex].include = (!strcmp(p, "--include") || + !strcmp(p, "--include-path")); + inex[ninex].wildcard = optval; + ninex++; } } else { fprintf(stderr, "%s: unrecognised option '%s'\n", @@ -306,7 +356,7 @@ int main(int argc, char **argv) strerror(errno)); return 1; } - ctx->filesystem_dev = st.st_dev; + ctx->filesystem_dev = crossfs ? 0 : st.st_dev; if (fstat(fd, &st) < 0) { perror("agedu: fstat"); @@ -314,6 +364,9 @@ int main(int argc, char **argv) } ctx->datafile_dev = st.st_dev; ctx->datafile_ino = st.st_ino; + ctx->inex = inex; + ctx->ninex = ninex; + ctx->crossfs = crossfs; ctx->last_output_update = time(NULL); -- 2.11.0