From f2e528936aac5fb2f68693ce8cda348c2a7c1a91 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 1 Nov 2008 14:49:32 +0000 Subject: [PATCH] Configurable age range represented by the colour coding in the HTML output. git-svn-id: svn://svn.tartarus.org/sgt/agedu@8239 cda61777-01e9-0310-a592-d414129be87e --- TODO | 149 +++++++++++++++++++++++++++++++++++----------------------------- agedu.c | 124 ++++++++++++++++++++++++++++++++++++----------------- html.c | 28 +++++++----- html.h | 30 ++++++++++--- httpd.c | 12 ++++-- httpd.h | 2 +- 6 files changed, 217 insertions(+), 128 deletions(-) diff --git a/TODO b/TODO index f4f4f0f..07a84f1 100644 --- a/TODO +++ b/TODO @@ -3,16 +3,19 @@ TODO list for agedu Before it's non-embarrassingly releasable: - - sort out the command line syntax - * I think there should be a unified --mode / -M for every - running mode, possibly without the one-letter option for the - diagnostic sorts of things - * there should be some configurable options: - + range limits on the age display - + server address in httpd mode - + HTTP authentication: specify username and/or password, the - latter by at least some means which doesn't involve it - showing up in "ps" + - add more configurable options + + server address in httpd mode + + HTTP authentication: specify username and/or password, the + latter by at least some means which doesn't involve it showing + up in "ps" + + - more flexible running modes + + combined scan+dump mode which doesn't even generate an index + file (nearly indistinguishable from find(1)) + + load mode which reads a dump from standard input and builds + the index (need to nail down a perfectly general dump format) + + at least some ability to chain actions within the same run: + "agedu -s dirname -w" would seem handy. - work out what to do about atimes on directories in the absence of the Linux syscall magic @@ -28,9 +31,43 @@ Before it's non-embarrassingly releasable: hackery in du.c and turning it into a more sensible run-time abstraction layer. - - make a final decision on the name! + - polish the plain-text output to make it look more like du + + configurable recursive output depth + + show the right bits last + + - figure out what to do about scans starting in the root directory + + Currently we end up with a double leading slash on the + pathnames, which is ugly, and we also get a zero-length href + in between those slashes which means the web interface doesn't + let you click back up to the top level at all. + + One big problem here is that a lot of the code assumes that + you can find the extent of a pathname by searching for "foo" + and "foo^A", trusting that anything inside the directory will + begin "foo/". So I'd need to consistently fix this everywhere + so that a trailing slash is disregarded while doing it, but + not actually removed. + + The text output gets it all wrong. + + The HTML output is fiddly even at the design stage: where + would I _ideally_ put the link to click on to get back to /? + It's unclear! + + - cross-Unix portability: + + use autoconf + * configure use of stat64 + * configure use of /proc/net/tcp + * configure use of /dev/random + * configure use of Linux syscall magic replacing readdir + + later glibcs have fdopendir, hooray! So we can use that + too, if it's available and O_NOATIME is too. + * what do we do elsewhere about _GNU_SOURCE? + + - prepare a little in advance for a potential future Windows port: + + store the separator character in the index file when writing + it, and be prepared to cope on reading if it isn't a slash + + store literal byte sizes in all the size fields, instead of + Unixoid 512-byte sectors - - man page, licence, online help. + - man page, licence. Future directions: @@ -45,62 +82,40 @@ Future directions: * All the same options should have their starting states configurable on the command line too. - - polish the plain-text output: - + do the same formatting as in HTML, by showing files as a - single unit and also sorting by size? (Probably the other way - up, due to scrolling.) - + configurable recursive output depth - - curses-ish equivalent of the web output + try using xterm 256-colour mode. Can (n)curses handle that? If not, try doing it manually. + + I think my current best idea is to bypass ncurses and go + straight to terminfo: generate lines of attribute-interleaved + text and display them, so we only really need the sequences + "go here and display stuff", "scroll up", "scroll down". + + I think the attribute-interleaved text might be possible to do + cunningly, as well: we autodetect a basically VT-style + terminal, and add 256-colour sequences on the end. So, for + instance, we might set ANSI-yellow foreground, set ANSI-red + background, _then_ set both foreground and background to the + appropriate xterm 256-colour, and then display some + appropriate character which would have given the right blend + of the ANSI-16 fore and background colours. Then the same + display code should gracefully degrade in the face of a + terminal which doesn't support xterm-256. + * current best plan is to simulate the xterm-256 shading from + 0/5 to 5/5 by doing space, colon and hash in colour A on + colour B background, then hash, colon and space in B on A + background. + + Infrastructure work before doing any of this would be to split + html.c into two: one part to prepare an abstract data + structure describing an HTML-like report (in particular, all + the index lookups, percentage calculation, vector arithmetic + and line sorting), and another part to generate the literal + HTML. Then the former can be reused to produce very similar + reports in coloured plain text. - - cross-module: - + figure out what to do about scans starting in the root - directory! - * Currently we end up with a double leading slash on the - pathnames, which is ugly, and we also get a zero-length - href in between those slashes which means the web interface - doesn't let you click back up to the top level at all. - * One big problem here is that a lot of the code assumes that - you can find the extent of a pathname by searching for - "foo" and "foo^A", trusting that anything inside the - directory will begin "foo/". So I'd need to consistently - fix this everywhere so that a trailing slash is disregarded - while doing it, but not actually removed. - * The text output gets it all wrong. - * The HTML output is fiddly even at the design stage: where - would I _ideally_ put the link to click on to get back to - /? It's unclear! - - - more flexible running modes - + decouple the disk scan from the index building code, so that - the former can optionally output in the same format as --dump - and the latter can optionally work from input on stdin (having - also fixed the --dump format in the process so it's perfectly - general). Then we could scan on one machine and transfer the - results over the net to another machine where they'd be - indexed; in particular, this way the indexing machine could be - 64-bit even if the machine owning the filesystems was only 32. - + in the other direction, ability to build a database _and_ - immediately run one of the ongoing interactive report modes - (httpd, curses) in a single invocation would seem handy. - - - portability - + between Unices: - * autoconf? - * configure use of stat64 - * configure use of /proc/net/tcp - * configure use of /dev/random - * configure use of Linux syscall magic replacing readdir - + later glibcs have fdopendir, hooray! So we can use that - too, if it's available and O_NOATIME is too. - * what do we do elsewhere about _GNU_SOURCE? - + http://msdn.microsoft.com/en-us/library/ms724290.aspx suggest - modern Windowses support atime-equivalents, so a Windows port - is possible in principle. Would need to modify the current - structure a lot, to abstract away (at least) memory-mapping of - files, details of disk scan procedure, networking for httpd, - the path separator character (yuck). Unclear what the right UI - would be on Windows, too; command-line exactly as now might be - considered just a _little_ unfriendly. Or perhaps not. + - http://msdn.microsoft.com/en-us/library/ms724290.aspx suggest + modern Windowses support atime-equivalents, so a Windows port is + possible in principle. Would need to modify the current structure + a lot, to abstract away (at least) memory-mapping of files, + details of disk scan procedure, networking for httpd. Unclear + what the right UI would be on Windows, too; command-line exactly + as now might be considered just a _little_ unfriendly. Or perhaps + not. diff --git a/agedu.c b/agedu.c index a5de3fe..607d425 100644 --- a/agedu.c +++ b/agedu.c @@ -271,6 +271,8 @@ static void text_query(const void *mappedfile, const char *querydir, HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \ VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ HELPARG("age") HELPOPT("[--text] include only files older than this") \ + VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \ + HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \ VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \ LONG(server_auth) LONG(web_auth) \ HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \ @@ -349,6 +351,45 @@ static void usage(FILE *fp) #undef HELPOPT } +static time_t parse_age(time_t now, const char *agestr) +{ + time_t t; + struct tm tm; + int nunits; + char unit[2]; + + t = now; + + if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) { + fprintf(stderr, "%s: age specification should be a number followed by" + " one of d,w,m,y\n", PNAME); + exit(1); + } + + if (unit[0] == 'd') { + t -= 86400 * nunits; + } else if (unit[0] == 'w') { + t -= 86400 * 7 * nunits; + } else { + int ym; + + tm = *localtime(&t); + ym = tm.tm_year * 12 + tm.tm_mon; + + if (unit[0] == 'm') + ym -= nunits; + else + ym -= 12 * nunits; + + tm.tm_year = ym / 12; + tm.tm_mon = ym % 12; + + t = mktime(&tm); + } + + return t; +} + int main(int argc, char **argv) { int fd, count; @@ -364,7 +405,9 @@ int main(int argc, char **argv) char *querydir = NULL; int doing_opts = 1; enum { USAGE, TEXT, HTML, SCAN, DUMP, HTTPD } mode = USAGE; - char *minage = "0d"; + time_t now = time(NULL); + time_t textcutoff = now, htmlnewest = now, htmloldest = now; + int htmlautoagerange = 1; int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; int progress = 1; struct inclusion_exclusion *inex = NULL; @@ -551,7 +594,19 @@ int main(int argc, char **argv) filename = optval; break; case OPT_MINAGE: - minage = optval; + textcutoff = parse_age(now, optval); + break; + case OPT_AGERANGE: + if (!strcmp(optval, "auto")) { + htmlautoagerange = 1; + } else { + char *q = optval + strcspn(optval, "-:"); + if (*q) + *q++ = '\0'; + htmloldest = parse_age(now, optval); + htmlnewest = *q ? parse_age(now, q) : now; + htmlautoagerange = 0; + } break; case OPT_AUTH: if (!strcmp(optval, "magic")) @@ -562,7 +617,20 @@ int main(int argc, char **argv) auth = HTTPD_AUTH_NONE; else if (!strcmp(optval, "default")) auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; - else { + else if (!strcmp(optval, "help") || + !strcmp(optval, "list")) { + printf("agedu: supported HTTP authentication types" + " are:\n" + " magic use Linux /proc/net/tcp to" + " determine owner of peer socket\n" + " basic HTTP Basic username and" + " password authentication\n" + " default use 'magic' if possible, " + " otherwise fall back to 'basic'\n" + " none unauthenticated HTTP (if" + " the data file is non-confidential)\n"); + return 0; + } else { fprintf(stderr, "%s: unrecognised authentication" " type '%s'\n%*s options are 'magic'," " 'basic', 'none', 'default'\n", @@ -703,41 +771,8 @@ int main(int argc, char **argv) close(fd); printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); } else if (mode == TEXT) { - time_t t; - struct tm tm; - int nunits; - char unit[2]; size_t pathlen; - t = time(NULL); - - if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) { - fprintf(stderr, "%s: minimum age should be a number followed by" - " one of d,w,m,y\n", PNAME); - return 1; - } - - if (unit[0] == 'd') { - t -= 86400 * nunits; - } else if (unit[0] == 'w') { - t -= 86400 * 7 * nunits; - } else { - int ym; - - tm = *localtime(&t); - ym = tm.tm_year * 12 + tm.tm_mon; - - if (unit[0] == 'm') - ym -= nunits; - else - ym -= 12 * nunits; - - tm.tm_year = ym / 12; - tm.tm_mon = ym % 12; - - t = mktime(&tm); - } - fd = open(filename, O_RDONLY); if (fd < 0) { fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, @@ -762,9 +797,10 @@ int main(int argc, char **argv) if (pathlen > 0 && querydir[pathlen-1] == '/') querydir[--pathlen] = '\0'; - text_query(mappedfile, querydir, t, 1); + text_query(mappedfile, querydir, textcutoff, 1); } else if (mode == HTML) { size_t pathlen; + struct html_config cfg; unsigned long xi; char *html; @@ -793,7 +829,11 @@ int main(int argc, char **argv) querydir[--pathlen] = '\0'; xi = trie_before(mappedfile, querydir); - html = html_query(mappedfile, xi, NULL); + cfg.format = NULL; + cfg.autoage = htmlautoagerange; + cfg.oldest = htmloldest; + cfg.newest = htmlnewest; + html = html_query(mappedfile, xi, &cfg); fputs(html, stdout); } else if (mode == DUMP) { size_t maxpathlen; @@ -825,6 +865,8 @@ int main(int argc, char **argv) } triewalk_free(tw); } else if (mode == HTTPD) { + struct html_config cfg; + fd = open(filename, O_RDONLY); if (fd < 0) { fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, @@ -842,7 +884,11 @@ int main(int argc, char **argv) return 1; } - run_httpd(mappedfile, auth); + cfg.format = NULL; + cfg.autoage = htmlautoagerange; + cfg.oldest = htmloldest; + cfg.newest = htmlnewest; + run_httpd(mappedfile, auth, &cfg); } return 0; diff --git a/html.c b/html.c index 0a57cb1..47e4372 100644 --- a/html.c +++ b/html.c @@ -385,7 +385,8 @@ static void write_report_line(struct html *ctx, struct vector *vec) htprintf(ctx, "\n\n"); } -char *html_query(const void *t, unsigned long index, const char *format) +char *html_query(const void *t, unsigned long index, + const struct html_config *cfg) { struct html actx, *ctx = &actx; char *path, *path2, *p, *q, *href; @@ -403,13 +404,13 @@ char *html_query(const void *t, unsigned long index, const char *format) ctx->buf = NULL; ctx->buflen = ctx->bufsize = 0; ctx->t = t; - ctx->format = format; + ctx->format = cfg->format; htprintf(ctx, "\n"); path = snewn(1+trie_maxpathlen(t), char); ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char); - if (format) { - hreflen = strlen(format) + 100; + if (cfg->format) { + hreflen = strlen(cfg->format) + 100; href = snewn(hreflen, char); } else { hreflen = 0; @@ -450,8 +451,8 @@ char *html_query(const void *t, unsigned long index, const char *format) *p = '\0'; index2 = trie_before(t, path); trie_getpath(t, index2, path2); - if (!strcmp(path, path2) && format) { - snprintf(href, hreflen, format, index2); + if (!strcmp(path, path2) && cfg->format) { + snprintf(href, hreflen, cfg->format, index2); htprintf(ctx, "", href); doing_href = 1; } @@ -469,11 +470,18 @@ char *html_query(const void *t, unsigned long index, const char *format) * Decide on the age limit of our colour coding, establish the * colour thresholds, and write out a key. */ - ctx->oldest = index_order_stat(t, 0.05); /* FIXME: configurability? */ - ctx->newest = index_order_stat(t, 1.0); ctx->now = time(NULL); - ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, -1); - ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, +1); + if (cfg->autoage) { + ctx->oldest = index_order_stat(t, 0.05); + ctx->newest = index_order_stat(t, 1.0); + ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, -1); + ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, +1); + } else { + ctx->oldest = cfg->oldest; + ctx->newest = cfg->newest; + ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, 0); + ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, 0); + } for (i = 0; i < MAXCOLOUR-1; i++) { ctx->thresholds[i] = ctx->oldest + (ctx->newest - ctx->oldest) * i / MAXCOLOUR; diff --git a/html.h b/html.h index a02babf..63aacb9 100644 --- a/html.h +++ b/html.h @@ -2,16 +2,32 @@ * html.h: HTML output format for agedu. */ +struct html_config { + /* + * If "format" is non-NULL, it is treated as an sprintf format + * string which must contain exactly one %lu and no other + * formatting directives (other than %%, which doesn't count); + * this will be used to construct URLs to use in hrefs + * pointing to queries of other related (parent and child) + * pathnames. + */ + const char *format; + + /* + * Time stamps to assign to the extreme ends of the colour + * scale. If "autoage" is true, they are ignored and the time + * stamps are derived from the limits of the age data stored + * in the index. + */ + int autoage; + time_t oldest, newest; +}; + /* * Generate an HTML document containing the results of a query * against the pathname at a given index. Returns a dynamically * allocated piece of memory containing the entire HTML document, * as an ordinary C zero-terminated string. - * - * If "format" is non-NULL, it is treated as an sprintf format - * string which must contain exactly one %lu and no other - * formatting directives (other than %%, which doesn't count); - * this will be used to construct URLs to use in hrefs pointing to - * queries of other related (parent and child) pathnames. */ -char *html_query(const void *t, unsigned long index, const char *format); +char *html_query(const void *t, unsigned long index, + const struct html_config *cfg); diff --git a/httpd.c b/httpd.c index 486ca21..b08f6ef 100644 --- a/httpd.c +++ b/httpd.c @@ -103,7 +103,8 @@ static char *http_success(char *mimetype, int stuff_cr, char *document) * socket before closing it. */ char *got_data(struct connctx *ctx, char *data, int length, - int magic_access, const char *auth_string) + int magic_access, const char *auth_string, + const struct html_config *cfg) { char *line, *p, *q, *r, *z1, *z2, c1, c2; int auth_provided = 0, auth_correct = 0; @@ -288,7 +289,7 @@ char *got_data(struct connctx *ctx, char *data, int length, p = ctx->url; p += strspn(p, "/?"); index = strtoul(p, NULL, 10); - document = html_query(ctx->t, index, "%lu"); + document = html_query(ctx->t, index, cfg); if (document) { ret = http_success("text/html", 1, document); sfree(document); @@ -416,7 +417,7 @@ static void base64_encode_atom(unsigned char *data, int n, char *out) out[3] = '='; } -void run_httpd(const void *t, int authmask) +void run_httpd(const void *t, int authmask, const struct html_config *incfg) { int fd; int authtype; @@ -425,6 +426,9 @@ void run_httpd(const void *t, int authmask) struct fd *f; struct sockaddr_in addr; socklen_t addrlen; + struct html_config cfg = *incfg; + + cfg.format = "%lu"; /* * Establish the listening socket and retrieve its port @@ -666,7 +670,7 @@ void run_httpd(const void *t, int authmask) fds[i].wdata = got_data (fds[i].cctx, readbuf, ret, (authtype == HTTPD_AUTH_NONE || - fds[i].magic_access), authstring); + fds[i].magic_access), authstring, &cfg); if (fds[i].wdata) { fds[i].wdatalen = strlen(fds[i].wdata); fds[i].wdatapos = 0; diff --git a/httpd.h b/httpd.h index c2ffda8..467803f 100644 --- a/httpd.h +++ b/httpd.h @@ -7,4 +7,4 @@ #define HTTPD_AUTH_BASIC 2 #define HTTPD_AUTH_NONE 4 -void run_httpd(const void *t, int authmask); +void run_httpd(const void *t, int authmask, const struct html_config *cfg); -- 2.11.0