--- /dev/null
+# Makefile for umlwrap.
+
+prefix = /usr/local
+libdir = $(prefix)/lib
+ourlibdir = $(prefix)/lib/umlwrap
+bindir = $(prefix)/bin
+mandir = $(prefix)/man
+man1dir = $(mandir)/man1
+
+INSTALL = install
+
+CFLAGS = -Wall --std=c99 -pedantic $(XFLAGS)
+
+-include Makefile.local
+
+AGEDU_MODULES := agedu du malloc trie index html httpd
+AGEDU_OBJS := $(patsubst %,%.o,$(AGEDU_MODULES))
+
+ALLMODULES := $(sort $(AGEDU_MODULES))
+ALLOBJS := $(patsubst %,%.o,$(ALLMODULES))
+ALLDEPS := $(patsubst %,%.d,$(ALLMODULES))
+
+binaries: agedu
+
+agedu: $(AGEDU_OBJS)
+ gcc $(LFLAGS) -o agedu $(AGEDU_OBJS)
+
+INTERNALFLAGS=#
+
+$(ALLOBJS): %.o: %.c
+ gcc $(CFLAGS) -MM $*.c > $*.d
+ gcc $(CFLAGS) $(INTERNALFLAGS) -c $*.c
+
+install: agedu
+ mkdir -p $(bindir)
+ $(INSTALL) -m 0755 agedu $(bindir)/agedu
+
+clean:
+ rm -f agedu $(ALLOBJS) $(ALLDEPS)
+
+-include $(ALLDEPS)
--- /dev/null
+TODO list for agedu
+===================
+
+Before it's non-embarrassingly releasable:
+
+ - render HTTP access control more sane.
+ * we should have the configurable option to use HTTP Basic
+ authentication or Linux magic /proc/net/tcp
+ * a third option, and the default one, should be to _try_ to use
+ magic auth, and fall back to HTTP Basic if unavailable
+
+ - sort out the command line syntax
+ * I think there should be a unified --mode / -M for every
+ running mode, possibly without the one-letter option for the
+ diagnostic sorts of things
+ * there should be some configurable options:
+ + range limits on the age display
+ + server address in httpd mode
+
+ - do some configurability for the disk scan
+ * wildcard-based includes and excludes
+ + wildcards can act on the last pathname component or the
+ whole lot
+ + include and exclude can be interleaved; implicit "include
+ *" before any
+ * reinstate filesystem crossing, though not doing so should
+ remain the default
+
+ - polish up disk-scan progress reporting
+ * by default it should be conditional on isatty(2)
+ * manual override to enable or disable
+ * we should find rather than guessing the terminal width
+
+ - work out what to do about atimes on directories
+ * one option is to read them during the scan and reinstate them
+ after each recursion pop. Race-condition prone.
+ * marking them in a distinctive colour in the reports is the
+ other option.
+
+ - make a final decision on the name!
+
+Future directions:
+
+ - run-time configuration in the HTTP server
+ * I think this probably works by having a configuration form, or
+ a link pointing to one, somewhere on the report page. If you
+ want to reconfigure anything, you fill in and submit the form;
+ the web server receives HTTP GET with parameters and a
+ referer, adjusts its internal configuration, and returns an
+ HTTP redirect back to the referring page - which it then
+ re-renders in accordance with the change.
+ * All the same options should have their starting states
+ configurable on the command line too.
+
+ - polish the plain-text output:
+ + do the same formatting as in HTML, by showing files as a
+ single unit and also sorting by size? (Probably the other way
+ up, due to scrolling.)
+ + configurable recursive output depth
+
+ - curses-ish equivalent of the web output
+ + try using xterm 256-colour mode. Can (n)curses handle that? If
+ not, try doing it manually.
+
+ - cross-module:
+ + figure out what to do about scans starting in the root
+ directory!
+ * Currently we end up with a double leading slash on the
+ pathnames, which is ugly, and we also get a zero-length
+ href in between those slashes which means the web interface
+ doesn't let you click back up to the top level at all.
+ * One big problem here is that a lot of the code assumes that
+ you can find the extent of a pathname by searching for
+ "foo" and "foo^A", trusting that anything inside the
+ directory will begin "foo/". So I'd need to consistently
+ fix this everywhere so that a trailing slash is disregarded
+ while doing it, but not actually removed.
+ * The text output gets it all wrong.
+ * The HTML output is fiddly even at the design stage: where
+ would I _ideally_ put the link to click on to get back to
+ /? It's unclear!
+
+ - more flexible running modes
+ + decouple the disk scan from the index building code, so that
+ the former can optionally output in the same format as --dump
+ and the latter can optionally work from input on stdin (having
+ also fixed the --dump format in the process so it's perfectly
+ general). Then we could scan on one machine and transfer the
+ results over the net to another machine where they'd be
+ indexed; in particular, this way the indexing machine could be
+ 64-bit even if the machine owning the filesystems was only 32.
+ + ability to build a database _and_ immediately run one of the
+ ongoing interactive report modes (httpd, curses) would seem
+ handy.
+
+ - portability
+ + between Unices:
+ * autoconf?
+ * configure use of stat64
+ * configure use of /proc/net/tcp
+ * what do we do elsewhere about _GNU_SOURCE?
+ + further afield: is there in fact any non-Unix OS that supports
+ atimes and hence can be used with agedu at all?
+ * yes! http://msdn.microsoft.com/en-us/library/ms724290.aspx
--- /dev/null
+/*
+ * Main program for agedu.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "du.h"
+#include "trie.h"
+#include "index.h"
+#include "malloc.h"
+#include "html.h"
+#include "httpd.h"
+
+#define PNAME "agedu"
+
+void fatal(const char *fmt, ...)
+{
+ va_list ap;
+ fprintf(stderr, "%s: ", PNAME);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+struct ctx {
+ triebuild *tb;
+ dev_t datafile_dev, filesystem_dev;
+ ino_t datafile_ino;
+ time_t last_output_update;
+};
+
+static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
+{
+ struct ctx *ctx = (struct ctx *)vctx;
+ struct trie_file file;
+ time_t t;
+
+ /*
+ * Filter out our own data file.
+ */
+ if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
+ return 0;
+
+ /*
+ * Don't cross the streams^W^Wany file system boundary.
+ * (FIXME: this should be a configurable option.)
+ */
+ if (st->st_dev != ctx->filesystem_dev)
+ return 0;
+
+ /*
+ * FIXME: other filtering in gotdata will be needed, when we
+ * implement serious filtering.
+ */
+
+ file.blocks = st->st_blocks;
+ file.atime = st->st_atime;
+ triebuild_add(ctx->tb, pathname, &file);
+
+ t = time(NULL);
+ if (t != ctx->last_output_update) {
+ fprintf(stderr, "%-79.79s\r", pathname);
+ fflush(stderr);
+ ctx->last_output_update = t;
+ }
+
+ return 1;
+}
+
+static void run_query(const void *mappedfile, const char *rootdir,
+ time_t t, int depth)
+{
+ size_t maxpathlen;
+ char *pathbuf;
+ unsigned long xi1, xi2;
+ unsigned long long s1, s2;
+
+ maxpathlen = trie_maxpathlen(mappedfile);
+ pathbuf = snewn(maxpathlen + 1, char);
+
+ /*
+ * We want to query everything between the supplied filename
+ * (inclusive) and that filename with a ^A on the end
+ * (exclusive). So find the x indices for each.
+ */
+ sprintf(pathbuf, "%s\001", rootdir);
+ xi1 = trie_before(mappedfile, rootdir);
+ xi2 = trie_before(mappedfile, pathbuf);
+
+ /*
+ * Now do the lookups in the age index.
+ */
+ s1 = index_query(mappedfile, xi1, t);
+ s2 = index_query(mappedfile, xi2, t);
+
+ /* Display in units of 2 512-byte blocks = 1Kb */
+ printf("%-11llu %s\n", (s2 - s1) / 2, rootdir);
+
+ if (depth > 0) {
+ /*
+ * Now scan for first-level subdirectories and report
+ * those too.
+ */
+ xi1++;
+ while (xi1 < xi2) {
+ trie_getpath(mappedfile, xi1, pathbuf);
+ run_query(mappedfile, pathbuf, t, depth-1);
+ strcat(pathbuf, "\001");
+ xi1 = trie_before(mappedfile, pathbuf);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int fd, count;
+ struct ctx actx, *ctx = &actx;
+ struct stat st;
+ off_t totalsize, realsize;
+ void *mappedfile;
+ triewalk *tw;
+ indexbuild *ib;
+ const struct trie_file *tf;
+ char *filename = "agedu.dat";
+ char *rootdir = NULL;
+ int doing_opts = 1;
+ enum { QUERY, HTML, SCAN, DUMP, HTTPD } mode = QUERY;
+ char *minage = "0d";
+
+ while (--argc > 0) {
+ char *p = *++argv;
+ char *optval;
+
+ if (doing_opts && *p == '-') {
+ if (!strcmp(p, "--")) {
+ doing_opts = 0;
+ } else if (p[1] == '-') {
+ char *optval = strchr(p, '=');
+ if (optval)
+ *optval++ = '\0';
+ if (!strcmp(p, "--help")) {
+ printf("FIXME: usage();\n");
+ return 0;
+ } else if (!strcmp(p, "--version")) {
+ printf("FIXME: version();\n");
+ return 0;
+ } else if (!strcmp(p, "--licence") ||
+ !strcmp(p, "--license")) {
+ printf("FIXME: licence();\n");
+ return 0;
+ } else if (!strcmp(p, "--scan")) {
+ mode = SCAN;
+ } else if (!strcmp(p, "--dump")) {
+ mode = DUMP;
+ } else if (!strcmp(p, "--html")) {
+ mode = HTML;
+ } else if (!strcmp(p, "--httpd") ||
+ !strcmp(p, "--server")) {
+ mode = HTTPD;
+ } else if (!strcmp(p, "--file") ||
+ !strcmp(p, "--minimum-age") ||
+ !strcmp(p, "--min-age") ||
+ !strcmp(p, "--age")) {
+ /*
+ * Long options requiring values.
+ */
+ if (!optval) {
+ if (--argc > 0) {
+ optval = *++argv;
+ } else {
+ fprintf(stderr, "%s: option '%s' requires"
+ " an argument\n", PNAME, p);
+ return 1;
+ }
+ }
+ if (!strcmp(p, "--file")) {
+ filename = optval;
+ } else if (!strcmp(p, "--minimum-age") ||
+ !strcmp(p, "--min-age") ||
+ !strcmp(p, "--age")) {
+ minage = optval;
+ }
+ } else {
+ fprintf(stderr, "%s: unrecognised option '%s'\n",
+ PNAME, p);
+ return 1;
+ }
+ } else {
+ p++;
+ while (*p) {
+ char c = *p++;
+
+ switch (c) {
+ /* Options requiring arguments. */
+ case 'f':
+ case 'a':
+ if (*p) {
+ optval = p;
+ p += strlen(p);
+ } else if (--argc > 0) {
+ optval = *++argv;
+ } else {
+ fprintf(stderr, "%s: option '-%c' requires"
+ " an argument\n", PNAME, c);
+ return 1;
+ }
+ switch (c) {
+ case 'f': /* data file name */
+ filename = optval;
+ break;
+ case 'a': /* maximum age */
+ minage = optval;
+ break;
+ }
+ break;
+ case 's':
+ mode = SCAN;
+ break;
+ default:
+ fprintf(stderr, "%s: unrecognised option '-%c'\n",
+ PNAME, c);
+ return 1;
+ }
+ }
+ }
+ } else {
+ if (!rootdir) {
+ rootdir = p;
+ } else {
+ fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
+ return 1;
+ }
+ }
+ }
+
+ if (!rootdir)
+ rootdir = ".";
+
+ if (mode == SCAN) {
+
+ fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
+ if (fd < 0) {
+ fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
+ strerror(errno));
+ return 1;
+ }
+
+ if (stat(rootdir, &st) < 0) {
+ fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir,
+ strerror(errno));
+ return 1;
+ }
+ ctx->filesystem_dev = st.st_dev;
+
+ if (fstat(fd, &st) < 0) {
+ perror("agedu: fstat");
+ return 1;
+ }
+ ctx->datafile_dev = st.st_dev;
+ ctx->datafile_ino = st.st_ino;
+
+ ctx->last_output_update = time(NULL);
+
+ /*
+ * Scan the directory tree, and write out the trie component
+ * of the data file.
+ */
+ ctx->tb = triebuild_new(fd);
+ du(rootdir, gotdata, ctx);
+ count = triebuild_finish(ctx->tb);
+ triebuild_free(ctx->tb);
+
+ fprintf(stderr, "%-79s\r", "");
+ fflush(stderr);
+
+ /*
+ * Work out how much space the cumulative index trees will
+ * take; enlarge the file, and memory-map it.
+ */
+ if (fstat(fd, &st) < 0) {
+ perror("agedu: fstat");
+ return 1;
+ }
+
+ printf("Built pathname index, %d entries, %ju bytes\n", count,
+ (intmax_t)st.st_size);
+
+ totalsize = index_compute_size(st.st_size, count);
+
+ if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
+ perror("agedu: lseek");
+ return 1;
+ }
+ if (write(fd, "\0", 1) < 1) {
+ perror("agedu: write");
+ return 1;
+ }
+
+ printf("Upper bound on index file size = %ju bytes\n",
+ (intmax_t)totalsize);
+
+ mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
+ if (!mappedfile) {
+ perror("agedu: mmap");
+ return 1;
+ }
+
+ ib = indexbuild_new(mappedfile, st.st_size, count);
+ tw = triewalk_new(mappedfile);
+ while ((tf = triewalk_next(tw, NULL)) != NULL)
+ indexbuild_add(ib, tf);
+ triewalk_free(tw);
+ realsize = indexbuild_realsize(ib);
+ indexbuild_free(ib);
+
+ munmap(mappedfile, totalsize);
+ ftruncate(fd, realsize);
+ close(fd);
+ printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
+ } else if (mode == QUERY) {
+ time_t t;
+ struct tm tm;
+ int nunits;
+ char unit[2];
+ size_t pathlen;
+
+ t = time(NULL);
+
+ if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) {
+ fprintf(stderr, "%s: minimum age should be a number followed by"
+ " one of d,w,m,y\n", PNAME);
+ return 1;
+ }
+
+ if (unit[0] == 'd') {
+ t -= 86400 * nunits;
+ } else if (unit[0] == 'w') {
+ t -= 86400 * 7 * nunits;
+ } else {
+ int ym;
+
+ tm = *localtime(&t);
+ ym = tm.tm_year * 12 + tm.tm_mon;
+
+ if (unit[0] == 'm')
+ ym -= nunits;
+ else
+ ym -= 12 * nunits;
+
+ tm.tm_year = ym / 12;
+ tm.tm_mon = ym % 12;
+
+ t = mktime(&tm);
+ }
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
+ strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st) < 0) {
+ perror("agedu: fstat");
+ return 1;
+ }
+ totalsize = st.st_size;
+ mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
+ if (!mappedfile) {
+ perror("agedu: mmap");
+ return 1;
+ }
+
+ /*
+ * Trim trailing slash, just in case.
+ */
+ pathlen = strlen(rootdir);
+ if (pathlen > 0 && rootdir[pathlen-1] == '/')
+ rootdir[--pathlen] = '\0';
+
+ run_query(mappedfile, rootdir, t, 1);
+ } else if (mode == HTML) {
+ size_t pathlen;
+ unsigned long xi;
+ char *html;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
+ strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st) < 0) {
+ perror("agedu: fstat");
+ return 1;
+ }
+ totalsize = st.st_size;
+ mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
+ if (!mappedfile) {
+ perror("agedu: mmap");
+ return 1;
+ }
+
+ /*
+ * Trim trailing slash, just in case.
+ */
+ pathlen = strlen(rootdir);
+ if (pathlen > 0 && rootdir[pathlen-1] == '/')
+ rootdir[--pathlen] = '\0';
+
+ xi = trie_before(mappedfile, rootdir);
+ html = html_query(mappedfile, xi, NULL);
+ fputs(html, stdout);
+ } else if (mode == DUMP) {
+ size_t maxpathlen;
+ char *buf;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
+ strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st) < 0) {
+ perror("agedu: fstat");
+ return 1;
+ }
+ totalsize = st.st_size;
+ mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
+ if (!mappedfile) {
+ perror("agedu: mmap");
+ return 1;
+ }
+
+ maxpathlen = trie_maxpathlen(mappedfile);
+ buf = snewn(maxpathlen, char);
+
+ tw = triewalk_new(mappedfile);
+ while ((tf = triewalk_next(tw, buf)) != NULL) {
+ printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime);
+ }
+ triewalk_free(tw);
+ } else if (mode == HTTPD) {
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
+ strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st) < 0) {
+ perror("agedu: fstat");
+ return 1;
+ }
+ totalsize = st.st_size;
+ mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
+ if (!mappedfile) {
+ perror("agedu: mmap");
+ return 1;
+ }
+
+ run_httpd(mappedfile);
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * du.c: implementation of du.h.
+ */
+
+#define _GNU_SOURCE
+#include <features.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "du.h"
+#include "malloc.h"
+
+#ifdef __linux__
+#define __KERNEL__
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <linux/dirent.h>
+#include <linux/unistd.h>
+typedef int dirhandle;
+typedef struct {
+ struct dirent data[32];
+ struct dirent *curr;
+ int pos, endpos;
+} direntry;
+_syscall3(int, getdents, uint, fd, struct dirent *, dirp, uint, count)
+#define OPENDIR(f) open(f, O_RDONLY | O_NOATIME | O_DIRECTORY)
+#define DIRVALID(dh) ((dh) >= 0)
+#define READDIR(dh,de) ((de).curr = (de).data, (de).pos = 0, \
+ ((de).endpos = getdents((dh), (de).data, sizeof((de).data))) > 0)
+#define DENAME(de) ((de).curr->d_name)
+#define DEDONE(de) ((de).pos >= (de).endpos)
+#define DEADVANCE(de) ((de).pos += (de).curr->d_reclen, \
+ (de).curr = (struct dirent *)((char *)(de).data + (de).pos))
+#define CLOSEDIR(dh) close(dh)
+#else
+#include <dirent.h>
+typedef DIR *dirhandle;
+typedef struct dirent *direntry;
+#define OPENDIR(f) opendir(f)
+#define DIRVALID(dh) ((dh) != NULL)
+#define READDIR(dh,de) (((de) = readdir(dh)) ? 1 : 0)
+#define DENAME(de) ((de)->d_name)
+#define DEDONE(de) ((de) == NULL)
+#define DEADVANCE(de) ((de) = NULL)
+#define CLOSEDIR(dh) closedir(dh)
+#endif
+
+static int str_cmp(const void *av, const void *bv)
+{
+ return strcmp(*(const char **)av, *(const char **)bv);
+}
+
+static void du_recurse(char **path, size_t pathlen, size_t *pathsize,
+ gotdata_fn_t gotdata, void *gotdata_ctx)
+{
+ direntry de;
+ dirhandle d;
+ struct stat64 st;
+ char **names;
+ size_t i, nnames, namesize;
+
+ if (lstat64(*path, &st) < 0) {
+ fprintf(stderr, "%s: lstat: %s\n", *path, strerror(errno));
+ return;
+ }
+
+ if (!gotdata(gotdata_ctx, *path, &st))
+ return;
+
+ if (!S_ISDIR(st.st_mode))
+ return;
+
+ names = NULL;
+ nnames = namesize = 0;
+
+ d = OPENDIR(*path);
+ if (!DIRVALID(d)) {
+ fprintf(stderr, "%s: opendir: %s\n", *path, strerror(errno));
+ return;
+ }
+ while (READDIR(d, de)) {
+ do {
+ const char *name = DENAME(de);
+ if (name[0] == '.' && (!name[1] || (name[1] == '.' && !name[2]))) {
+ /* do nothing - we skip "." and ".." */
+ } else {
+ if (nnames >= namesize) {
+ namesize = nnames * 3 / 2 + 64;
+ names = sresize(names, namesize, char *);
+ }
+ names[nnames++] = dupstr(name);
+ }
+ DEADVANCE(de);
+ } while (!DEDONE(de));
+ }
+ CLOSEDIR(d);
+
+ if (nnames == 0)
+ return;
+
+ qsort(names, nnames, sizeof(*names), str_cmp);
+
+ for (i = 0; i < nnames; i++) {
+ size_t newpathlen = pathlen + 1 + strlen(names[i]);
+ if (*pathsize <= newpathlen) {
+ *pathsize = newpathlen * 3 / 2 + 256;
+ *path = sresize(*path, *pathsize, char);
+ }
+ sprintf(*path + pathlen, "/%s", names[i]);
+
+ du_recurse(path, newpathlen, pathsize, gotdata, gotdata_ctx);
+
+ sfree(names[i]);
+ }
+ sfree(names);
+}
+
+void du(char *inpath, gotdata_fn_t gotdata, void *gotdata_ctx)
+{
+ char *path;
+ size_t pathlen, pathsize;
+
+ pathlen = strlen(inpath);
+ pathsize = pathlen + 256;
+ path = snewn(pathsize, char);
+ strcpy(path, inpath);
+
+ du_recurse(&path, pathlen, &pathsize, gotdata, gotdata_ctx);
+}
--- /dev/null
+/*
+ * du.h: the function which actually performs the disk scan.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+/*
+ * Function called to report a file or directory, its size and its
+ * last-access time.
+ *
+ * Returns non-zero if recursion should proceed into this file's
+ * contents (if it's a directory); zero if it should not. If the
+ * file isn't a directory, the return value is ignored.
+ */
+typedef int (*gotdata_fn_t)(void *ctx,
+ const char *pathname,
+ const struct stat64 *st);
+
+/*
+ * Recursively scan a directory tree and report every
+ * space-consuming item in it to gotdata().
+ */
+void du(char *path, gotdata_fn_t gotdata, void *gotdata_ctx);
--- /dev/null
+/*
+ * html.c: implementation of html.h.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#include "html.h"
+#include "malloc.h"
+#include "trie.h"
+#include "index.h"
+
+#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
+
+#define MAXCOLOUR 511
+
+struct html {
+ char *buf;
+ size_t buflen, bufsize;
+ const void *t;
+ unsigned long long totalsize, oldest, newest;
+ char *path2;
+ char *href;
+ size_t hreflen;
+ const char *format;
+ unsigned long long thresholds[MAXCOLOUR-1];
+ time_t now;
+};
+
+static void vhtprintf(struct html *ctx, char *fmt, va_list ap)
+{
+ va_list ap2;
+ int size, size2;
+
+ va_copy(ap2, ap);
+ size = vsnprintf(NULL, 0, fmt, ap2);
+ va_end(ap2);
+
+ if (ctx->buflen + size >= ctx->bufsize) {
+ ctx->bufsize = (ctx->buflen + size) * 3 / 2 + 1024;
+ ctx->buf = sresize(ctx->buf, ctx->bufsize, char);
+ }
+ size2 = vsnprintf(ctx->buf + ctx->buflen, ctx->bufsize - ctx->buflen,
+ fmt, ap);
+ assert(size == size2);
+ ctx->buflen += size;
+}
+
+static void htprintf(struct html *ctx, char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vhtprintf(ctx, fmt, ap);
+ va_end(ap);
+}
+
+static unsigned long long round_and_format_age(struct html *ctx,
+ unsigned long long age,
+ char *buf, int direction)
+{
+ struct tm tm, tm2;
+ char newbuf[80];
+ unsigned long long ret, newret;
+ int i;
+ int ym;
+ static const int minutes[] = { 5, 10, 15, 30, 45 };
+
+ tm = *localtime(&ctx->now);
+ ym = tm.tm_year * 12 + tm.tm_mon;
+
+ ret = ctx->now;
+ strcpy(buf, "Now");
+
+ for (i = 0; i < lenof(minutes); i++) {
+ newret = ctx->now - minutes[i] * 60;
+ sprintf(newbuf, "%d minutes", minutes[i]);
+ if (newret < age)
+ goto finish;
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+
+ for (i = 1; i < 24; i++) {
+ newret = ctx->now - i * (60*60);
+ sprintf(newbuf, "%d hour%s", i, i==1 ? "" : "s");
+ if (newret < age)
+ goto finish;
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+
+ for (i = 1; i < 7; i++) {
+ newret = ctx->now - i * (24*60*60);
+ sprintf(newbuf, "%d day%s", i, i==1 ? "" : "s");
+ if (newret < age)
+ goto finish;
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+
+ for (i = 1; i < 4; i++) {
+ newret = ctx->now - i * (7*24*60*60);
+ sprintf(newbuf, "%d week%s", i, i==1 ? "" : "s");
+ if (newret < age)
+ goto finish;
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+
+ for (i = 1; i < 11; i++) {
+ tm2 = tm; /* structure copy */
+ tm2.tm_year = (ym - i) / 12;
+ tm2.tm_mon = (ym - i) % 12;
+ newret = mktime(&tm2);
+ sprintf(newbuf, "%d month%s", i, i==1 ? "" : "s");
+ if (newret < age)
+ goto finish;
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+
+ for (i = 1;; i++) {
+ tm2 = tm; /* structure copy */
+ tm2.tm_year = (ym - i*12) / 12;
+ tm2.tm_mon = (ym - i*12) % 12;
+ newret = mktime(&tm2);
+ sprintf(newbuf, "%d year%s", i, i==1 ? "" : "s");
+ if (newret < age)
+ goto finish;
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+
+ finish:
+ if (direction > 0) {
+ /*
+ * Round toward newest, i.e. use the existing (buf,ret).
+ */
+ } else if (direction < 0) {
+ /*
+ * Round toward oldest, i.e. use (newbuf,newret);
+ */
+ strcpy(buf, newbuf);
+ ret = newret;
+ } else {
+ /*
+ * Round to nearest.
+ */
+ if (ret - age > age - newret) {
+ strcpy(buf, newbuf);
+ ret = newret;
+ }
+ }
+ return ret;
+}
+
+static void get_indices(const void *t, char *path,
+ unsigned long *xi1, unsigned long *xi2)
+{
+ size_t pathlen = strlen(path);
+
+ *xi1 = trie_before(t, path);
+ path[pathlen] = '\001';
+ path[pathlen+1] = '\0';
+ *xi2 = trie_before(t, path);
+ path[pathlen] = '\0';
+}
+
+static unsigned long long fetch_size(const void *t, char *path,
+ unsigned long long atime)
+{
+ unsigned long xi1, xi2;
+
+ get_indices(t, path, &xi1, &xi2);
+
+ return index_query(t, xi2, atime) - index_query(t, xi1, atime);
+}
+
+static void htescape(struct html *ctx, const char *s, int n, int italics)
+{
+ while (n > 0 && *s) {
+ unsigned char c = (unsigned char)*s++;
+
+ if (c == '&')
+ htprintf(ctx, "&");
+ else if (c == '<')
+ htprintf(ctx, "<");
+ else if (c == '>')
+ htprintf(ctx, ">");
+ else if (c >= ' ' && c < '\177')
+ htprintf(ctx, "%c", c);
+ else {
+ if (italics) htprintf(ctx, "<i>");
+ htprintf(ctx, "[%02x]", c);
+ if (italics) htprintf(ctx, "</i>");
+ }
+
+ n--;
+ }
+}
+
+static void begin_colour_bar(struct html *ctx)
+{
+ htprintf(ctx, "<table cellspacing=0 cellpadding=0"
+ " style=\"border:0\">\n<tr>\n");
+}
+
+static void add_to_colour_bar(struct html *ctx, int colour, int pixels)
+{
+ int r, g, b;
+ char buf[80];
+
+ if (colour >= 0 && colour < 256) /* red -> yellow fade */
+ r = 255, g = colour, b = 0;
+ else if (colour >= 256 && colour <= 511) /* yellow -> green fade */
+ r = 511 - colour, g = 255, b = 0;
+ else /* background grey */
+ r = g = b = 240;
+
+ if (colour < 0) {
+ /* no title text here */
+ } else if (colour == 0) {
+ strcpy(buf, "< ");
+ round_and_format_age(ctx, ctx->thresholds[0], buf+5, 0);
+ } else if (colour == MAXCOLOUR) {
+ strcpy(buf, "> ");
+ round_and_format_age(ctx, ctx->thresholds[MAXCOLOUR-1], buf+5, 0);
+ } else {
+ unsigned long long midrange =
+ (ctx->thresholds[colour] + ctx->thresholds[colour+1]) / 2;
+ round_and_format_age(ctx, midrange, buf, 0);
+ }
+
+ if (pixels > 0) {
+ htprintf(ctx, "<td style=\"width:%dpx; height:1em; "
+ "background-color:#%02x%02x%02x\"",
+ pixels, r, g, b);
+ if (colour >= 0)
+ htprintf(ctx, " title=\"%s\"", buf);
+ htprintf(ctx, "></td>\n");
+ }
+}
+
+static void end_colour_bar(struct html *ctx)
+{
+ htprintf(ctx, "</tr>\n</table>\n");
+}
+
+struct vector {
+ int want_href;
+ char *name;
+ unsigned long index;
+ unsigned long long sizes[MAXCOLOUR+1];
+};
+
+int vec_compare(const void *av, const void *bv)
+{
+ const struct vector *a = *(const struct vector **)av;
+ const struct vector *b = *(const struct vector **)bv;
+
+ if (a->sizes[MAXCOLOUR] > b->sizes[MAXCOLOUR])
+ return -1;
+ else if (a->sizes[MAXCOLOUR] < b->sizes[MAXCOLOUR])
+ return +1;
+ else if (a->want_href < b->want_href)
+ return +1;
+ else if (a->want_href > b->want_href)
+ return -1;
+ else if (a->want_href)
+ return strcmp(a->name, b->name);
+ else if (a->index < b->index)
+ return -1;
+ else if (a->index > b->index)
+ return +1;
+ return 0;
+}
+
+static struct vector *make_vector(struct html *ctx, char *path,
+ int want_href, char *name)
+{
+ unsigned long xi1, xi2;
+ struct vector *vec = snew(struct vector);
+ int i;
+
+ vec->want_href = want_href;
+ vec->name = name ? dupstr(name) : NULL;
+
+ get_indices(ctx->t, path, &xi1, &xi2);
+
+ vec->index = xi1;
+
+ for (i = 0; i <= MAXCOLOUR; i++) {
+ unsigned long long atime;
+ if (i == MAXCOLOUR)
+ atime = ULLONG_MAX;
+ else
+ atime = ctx->thresholds[i];
+ vec->sizes[i] = fetch_size(ctx->t, path, atime);
+ }
+
+ return vec;
+}
+
+static void print_heading(struct html *ctx, const char *title)
+{
+ htprintf(ctx, "<tr style=\"padding: 0.2em; background-color:#e0e0e0\">\n"
+ "<td colspan=4 align=center>%s</td>\n</tr>\n", title);
+}
+
+#define PIXEL_SIZE 600 /* FIXME: configurability? */
+static void write_report_line(struct html *ctx, struct vector *vec)
+{
+ unsigned long long size, asize;
+ int pix, newpix;
+ int i;
+
+ /*
+ * Find the total size of this subdirectory.
+ */
+ size = vec->sizes[MAXCOLOUR];
+ htprintf(ctx, "<tr>\n"
+ "<td style=\"padding: 0.2em; text-align: right\">%lluMb</td>\n",
+ ((size + ((1<<11)-1)) >> 11)); /* convert to Mb, rounding up */
+
+ /*
+ * Generate a colour bar.
+ */
+ htprintf(ctx, "<td style=\"padding: 0.2em\">\n");
+ begin_colour_bar(ctx);
+ pix = 0;
+ for (i = 0; i <= MAXCOLOUR; i++) {
+ asize = vec->sizes[i];
+ newpix = asize * PIXEL_SIZE / ctx->totalsize;
+ add_to_colour_bar(ctx, i, newpix - pix);
+ pix = newpix;
+ }
+ add_to_colour_bar(ctx, -1, PIXEL_SIZE - pix);
+ end_colour_bar(ctx);
+ htprintf(ctx, "</td>\n");
+
+ /*
+ * Output size as a percentage of totalsize.
+ */
+ htprintf(ctx, "<td style=\"padding: 0.2em; text-align: right\">"
+ "%.2f%%</td>\n", (double)size / ctx->totalsize * 100.0);
+
+ /*
+ * Output a subdirectory marker.
+ */
+ htprintf(ctx, "<td style=\"padding: 0.2em\">");
+ if (vec->name) {
+ int doing_href = 0;
+
+ if (ctx->format && vec->want_href) {
+ snprintf(ctx->href, ctx->hreflen, ctx->format, vec->index);
+ htprintf(ctx, "<a href=\"%s\">", ctx->href);
+ doing_href = 1;
+ }
+ htescape(ctx, vec->name, strlen(vec->name), 1);
+ if (doing_href)
+ htprintf(ctx, "</a>");
+ }
+ htprintf(ctx, "</td>\n</tr>\n");
+}
+
+char *html_query(const void *t, unsigned long index, const char *format)
+{
+ struct html actx, *ctx = &actx;
+ char *path, *path2, *p, *q, *href;
+ char agebuf1[80], agebuf2[80];
+ size_t pathlen, hreflen;
+ unsigned long index2;
+ int i;
+ struct vector **vecs;
+ int nvecs, vecsize;
+ unsigned long xi1, xi2, xj1, xj2;
+
+ if (index >= trie_count(t))
+ return NULL;
+
+ ctx->buf = NULL;
+ ctx->buflen = ctx->bufsize = 0;
+ ctx->t = t;
+ ctx->format = format;
+ htprintf(ctx, "<html>\n");
+
+ path = snewn(1+trie_maxpathlen(t), char);
+ ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char);
+ if (format) {
+ hreflen = strlen(format) + 100;
+ href = snewn(hreflen, char);
+ } else {
+ hreflen = 0;
+ href = NULL;
+ }
+ ctx->hreflen = hreflen;
+ ctx->href = href;
+
+ /*
+ * HEAD section.
+ */
+ htprintf(ctx, "<head>\n");
+ trie_getpath(t, index, path);
+ htprintf(ctx, "<title>agedu: ");
+ htescape(ctx, path, strlen(path), 0);
+ htprintf(ctx, "</title>\n");
+ htprintf(ctx, "</head>\n");
+
+ /*
+ * Begin BODY section.
+ */
+ htprintf(ctx, "<body>\n");
+ htprintf(ctx, "<h3 align=center>Disk space breakdown by"
+ " last-access time</h3>\n");
+
+ /*
+ * Show the pathname we're centred on, with hyperlinks to
+ * parent directories where available.
+ */
+ htprintf(ctx, "<p align=center>\n<code>");
+ q = path;
+ for (p = strchr(path, '/'); p; p = strchr(p+1, '/')) {
+ int doing_href = 0;
+ /*
+ * See if this path prefix exists in the trie. If so,
+ * generate a hyperlink.
+ */
+ *p = '\0';
+ index2 = trie_before(t, path);
+ trie_getpath(t, index2, path2);
+ if (!strcmp(path, path2) && format) {
+ snprintf(href, hreflen, format, index2);
+ htprintf(ctx, "<a href=\"%s\">", href);
+ doing_href = 1;
+ }
+ *p = '/';
+ htescape(ctx, q, p - q, 1);
+ q = p + 1;
+ if (doing_href)
+ htprintf(ctx, "</a>");
+ htprintf(ctx, "/");
+ }
+ htescape(ctx, q, strlen(q), 1);
+ htprintf(ctx, "</code>\n");
+
+ /*
+ * Decide on the age limit of our colour coding, establish the
+ * colour thresholds, and write out a key.
+ */
+ ctx->oldest = index_order_stat(t, 0.05); /* FIXME: configurability? */
+ ctx->newest = index_order_stat(t, 1.0);
+ ctx->now = time(NULL);
+ ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, -1);
+ ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, +1);
+ for (i = 0; i < MAXCOLOUR-1; i++) {
+ ctx->thresholds[i] =
+ ctx->oldest + (ctx->newest - ctx->oldest) * i / MAXCOLOUR;
+ }
+ htprintf(ctx, "<p align=center>Key to colour coding (mouse over for more detail):\n");
+ htprintf(ctx, "<p align=center style=\"padding: 0; margin-top:0.4em; "
+ "margin-bottom:1em\"");
+ begin_colour_bar(ctx);
+ htprintf(ctx, "<td style=\"padding-right:1em\">%s</td>\n", agebuf1);
+ for (i = 0; i < MAXCOLOUR; i++)
+ add_to_colour_bar(ctx, i, 1);
+ htprintf(ctx, "<td style=\"padding-left:1em\">%s</td>\n", agebuf2);
+ end_colour_bar(ctx);
+
+ /*
+ * Begin the main table.
+ */
+ htprintf(ctx, "<p align=center>\n<table style=\"margin:0; border:0\">\n");
+
+ /*
+ * Find the total size of our entire subdirectory. We'll use
+ * that as the scale for all the colour bars in this report.
+ */
+ ctx->totalsize = fetch_size(t, path, ULLONG_MAX);
+
+ /*
+ * Generate a report line for the whole subdirectory.
+ */
+ vecsize = 64;
+ vecs = snewn(vecsize, struct vector *);
+ nvecs = 1;
+ vecs[0] = make_vector(ctx, path, 0, NULL);
+ print_heading(ctx, "Overall");
+ write_report_line(ctx, vecs[0]);
+
+ /*
+ * Now generate report lines for all its children, and the
+ * files contained in it.
+ */
+ print_heading(ctx, "Subdirectories");
+
+ vecs[0]->name = dupstr("[files]");
+ get_indices(t, path, &xi1, &xi2);
+ xi1++;
+ pathlen = strlen(path);
+ while (xi1 < xi2) {
+ trie_getpath(t, xi1, path2);
+ get_indices(t, ctx->path2, &xj1, &xj2);
+ xi1 = xj2;
+ if (xj2 - xj1 <= 1)
+ continue; /* skip individual files */
+ if (nvecs >= vecsize) {
+ vecsize = nvecs * 3 / 2 + 64;
+ vecs = sresize(vecs, vecsize, struct vector *);
+ }
+ assert(strlen(path2) > pathlen);
+ vecs[nvecs] = make_vector(ctx, path2, 1, path2 + pathlen + 1);
+ for (i = 0; i <= MAXCOLOUR; i++)
+ vecs[0]->sizes[i] -= vecs[nvecs]->sizes[i];
+ nvecs++;
+ }
+
+ qsort(vecs, nvecs, sizeof(vecs[0]), vec_compare);
+
+ for (i = 0; i < nvecs; i++)
+ write_report_line(ctx, vecs[i]);
+
+ /*
+ * Close the main table.
+ */
+ htprintf(ctx, "</table>\n");
+
+ /*
+ * Finish up and tidy up.
+ */
+ htprintf(ctx, "</body>\n");
+ htprintf(ctx, "</html>\n");
+ sfree(href);
+ sfree(path2);
+ sfree(path);
+ for (i = 0; i < nvecs; i++) {
+ sfree(vecs[i]->name);
+ sfree(vecs[i]);
+ }
+ sfree(vecs);
+
+ return ctx->buf;
+}
--- /dev/null
+/*
+ * html.h: HTML output format for agedu.
+ */
+
+/*
+ * Generate an HTML document containing the results of a query
+ * against the pathname at a given index. Returns a dynamically
+ * allocated piece of memory containing the entire HTML document,
+ * as an ordinary C zero-terminated string.
+ *
+ * If "format" is non-NULL, it is treated as an sprintf format
+ * string which must contain exactly one %lu and no other
+ * formatting directives (other than %%, which doesn't count);
+ * this will be used to construct URLs to use in hrefs pointing to
+ * queries of other related (parent and child) pathnames.
+ */
+char *html_query(const void *t, unsigned long index, const char *format);
--- /dev/null
+/*
+ * httpd.c: implementation of httpd.h.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <syslog.h>
+
+#include "malloc.h"
+#include "html.h"
+
+/* --- Logic driving what the web server's responses are. --- */
+
+struct connctx {
+ const void *t;
+ char *data;
+ int datalen, datasize;
+};
+
+/*
+ * Called when a new connection arrives on a listening socket.
+ * Returns a connctx for the new connection.
+ */
+struct connctx *new_connection(const void *t)
+{
+ struct connctx *cctx = snew(struct connctx);
+ cctx->t = t;
+ cctx->data = NULL;
+ cctx->datalen = cctx->datasize = 0;
+ return cctx;
+}
+
+void free_connection(struct connctx *cctx)
+{
+ sfree(cctx->data);
+ sfree(cctx);
+}
+
+static char *http_error(char *code, char *errmsg, char *errtext, ...)
+{
+ return dupfmt("HTTP/1.1 %s %s\r\n"
+ "Date: %D\r\n"
+ "Server: agedu\r\n"
+ "Connection: close\r\n"
+ "Content-Type: text/html; charset=US-ASCII\r\n"
+ "\r\n"
+ "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\r\n"
+ "<HTML><HEAD>\r\n"
+ "<TITLE>%s %s</TITLE>\r\n"
+ "</HEAD><BODY>\r\n"
+ "<H1>%s %s</H1>\r\n"
+ "<P>%s</P>\r\n"
+ "</BODY></HTML>\r\n", code, errmsg,
+ code, errmsg, code, errmsg, errtext);
+}
+
+static char *http_success(char *mimetype, int stuff_cr, char *document)
+{
+ return dupfmt("HTTP/1.1 200 OK\r\n"
+ "Date: %D\r\n"
+ "Expires: %D\r\n"
+ "Server: agedu\r\n"
+ "Connection: close\r\n"
+ "Content-Type: %s\r\n"
+ "\r\n"
+ "%S", mimetype, stuff_cr, document);
+}
+
+/*
+ * Called when data comes in on a connection.
+ *
+ * If this function returns NULL, the platform code continues
+ * reading from the socket. Otherwise, it returns some dynamically
+ * allocated data which the platform code will then write to the
+ * socket before closing it.
+ */
+char *got_data(struct connctx *ctx, char *data, int length, int magic_access)
+{
+ char *line, *p, *q, *z1, *z2, c1, c2;
+ unsigned long index;
+ char *document, *ret;
+
+ if (ctx->datasize < ctx->datalen + length) {
+ ctx->datasize = (ctx->datalen + length) * 3 / 2 + 4096;
+ ctx->data = sresize(ctx->data, ctx->datasize, char);
+ }
+ memcpy(ctx->data + ctx->datalen, data, length);
+ ctx->datalen += length;
+
+ /*
+ * See if we have enough of an HTTP request to work out our
+ * response.
+ */
+ line = ctx->data;
+ /*
+ * RFC 2616 section 4.1: `In the interest of robustness, [...]
+ * if the server is reading the protocol stream at the
+ * beginning of a message and receives a CRLF first, it should
+ * ignore the CRLF.'
+ */
+ while (line - ctx->data < ctx->datalen &&
+ (*line == '\r' || *line == '\n'))
+ line++;
+
+ q = line;
+ while (q - ctx->data < ctx->datalen && *q != '\r' && *q != '\n')
+ q++;
+ if (q - ctx->data >= ctx->datalen)
+ return NULL; /* not got request line yet */
+
+ /*
+ * We've got the first line of the request, which is enough
+ * for us to work out what to say in response and start
+ * sending it. The platform side will keep reading data, but
+ * it'll ignore it.
+ *
+ * So, zero-terminate our line and parse it.
+ */
+ *q = '\0';
+ z1 = z2 = q;
+ c1 = c2 = *q;
+ p = line;
+ while (*p && !isspace((unsigned char)*p)) p++;
+ if (*p) {
+ z1 = p++;
+ c1 = *z1;
+ *z1 = '\0';
+ }
+ while (*p && isspace((unsigned char)*p)) p++;
+ q = p;
+ while (*q && !isspace((unsigned char)*q)) q++;
+ z2 = q;
+ c2 = *z2;
+ *z2 = '\0';
+
+ /*
+ * Now `line' points at the method name; p points at the URL,
+ * if any.
+ */
+
+ /*
+ * There should _be_ a URL, on any request type at all.
+ */
+ if (!*p) {
+ char *ret, *text;
+ *z2 = c2;
+ *z1 = c1;
+ text = dupfmt("<code>agedu</code> received the HTTP request"
+ " \"<code>%s</code>\", which contains no URL.",
+ line);
+ ret = http_error("400", "Bad request", text);
+ sfree(text);
+ return ret;
+ }
+
+ if (!magic_access) {
+ ret = http_error("403", "Forbidden",
+ "This is a restricted-access set of pages.");
+ } else {
+ p += strspn(p, "/?");
+ index = strtoul(p, NULL, 10);
+ document = html_query(ctx->t, index, "%lu");
+ if (document) {
+ ret = http_success("text/html", 1, document);
+ sfree(document);
+ } else {
+ ret = http_error("404", "Not Found",
+ "Pathname index out of range.");
+ }
+ }
+ return ret;
+}
+
+/* --- Platform support for running a web server. --- */
+
+enum { FD_CLIENT, FD_LISTENER, FD_CONNECTION };
+
+struct fd {
+ int fd;
+ int type;
+ int deleted;
+ char *wdata;
+ int wdatalen, wdatapos;
+ int magic_access;
+ struct connctx *cctx;
+};
+
+struct fd *fds = NULL;
+int nfds = 0, fdsize = 0;
+
+struct fd *new_fdstruct(int fd, int type)
+{
+ struct fd *ret;
+
+ if (nfds >= fdsize) {
+ fdsize = nfds * 3 / 2 + 32;
+ fds = sresize(fds, fdsize, struct fd);
+ }
+
+ ret = &fds[nfds++];
+
+ ret->fd = fd;
+ ret->type = type;
+ ret->wdata = NULL;
+ ret->wdatalen = ret->wdatapos = 0;
+ ret->cctx = NULL;
+ ret->deleted = 0;
+ ret->magic_access = 0;
+
+ return ret;
+}
+
+void check_magic_access(struct fd *fd)
+{
+ struct sockaddr_in sock, peer;
+ socklen_t addrlen;
+ char linebuf[4096], matchbuf[80];
+ FILE *fp;
+
+ addrlen = sizeof(sock);
+ if (getsockname(fd->fd, (struct sockaddr *)&sock, &addrlen)) {
+ fprintf(stderr, "getsockname: %s\n", strerror(errno));
+ exit(1);
+ }
+ addrlen = sizeof(peer);
+ if (getpeername(fd->fd, (struct sockaddr *)&peer, &addrlen)) {
+ fprintf(stderr, "getpeername: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ sprintf(matchbuf, "%08X:%04X %08X:%04X",
+ peer.sin_addr.s_addr, ntohs(peer.sin_port),
+ sock.sin_addr.s_addr, ntohs(sock.sin_port));
+ fp = fopen("/proc/net/tcp", "r");
+ if (fp) {
+ while (fgets(linebuf, sizeof(linebuf), fp)) {
+ if (strlen(linebuf) >= 75 &&
+ !strncmp(linebuf+6, matchbuf, strlen(matchbuf))) {
+ int uid = atoi(linebuf + 75);
+ if (uid == getuid())
+ fd->magic_access = 1;
+ }
+ }
+ }
+}
+
+void run_httpd(const void *t)
+{
+ int fd;
+ unsigned long ipaddr;
+ struct fd *f;
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+
+ /*
+ * Establish the listening socket and retrieve its port
+ * number.
+ */
+ fd = socket(PF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "socket(PF_INET): %s\n", strerror(errno));
+ exit(1);
+ }
+ addr.sin_family = AF_INET;
+ srand(0L);
+ ipaddr = 0x7f000000;
+ ipaddr += (1 + rand() % 255) << 16;
+ ipaddr += (1 + rand() % 255) << 8;
+ ipaddr += (1 + rand() % 255);
+ addr.sin_addr.s_addr = htonl(ipaddr);
+ addr.sin_port = htons(0);
+ addrlen = sizeof(addr);
+ if (bind(fd, (struct sockaddr *)&addr, addrlen) < 0) {
+ fprintf(stderr, "bind: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (listen(fd, 5) < 0) {
+ fprintf(stderr, "listen: %s\n", strerror(errno));
+ exit(1);
+ }
+ addrlen = sizeof(addr);
+ if (getsockname(fd, (struct sockaddr *)&addr, &addrlen)) {
+ fprintf(stderr, "getsockname: %s\n", strerror(errno));
+ exit(1);
+ }
+ printf("Server is at http://%s:%d/\n",
+ inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
+
+ /*
+ * Now construct an fd structure to hold it.
+ */
+ f = new_fdstruct(fd, FD_LISTENER);
+
+ /*
+ * Read from standard input, and treat EOF as a notification
+ * to exit.
+ */
+ new_fdstruct(0, FD_CLIENT);
+
+ /*
+ * Now we're ready to run our main loop. Keep looping round on
+ * select.
+ */
+ while (1) {
+ fd_set rfds, wfds;
+ int i, j, maxfd, ret;
+
+#define FD_SET_MAX(fd, set, max) \
+ do { FD_SET((fd),(set)); (max) = ((max)<=(fd)?(fd)+1:(max)); } while(0)
+
+ /*
+ * Loop round the fd list putting fds into our select
+ * sets. Also in this loop we remove any that were marked
+ * as deleted in the previous loop.
+ */
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ maxfd = 0;
+ for (i = j = 0; j < nfds; j++) {
+
+ if (fds[j].deleted) {
+ sfree(fds[j].wdata);
+ free_connection(fds[j].cctx);
+ continue;
+ }
+ fds[i] = fds[j];
+
+ switch (fds[i].type) {
+ case FD_CLIENT:
+ case FD_LISTENER:
+ FD_SET_MAX(fds[i].fd, &rfds, maxfd);
+ break;
+ case FD_CONNECTION:
+ /*
+ * Always read from a connection socket. Even
+ * after we've started writing, the peer might
+ * still be sending (e.g. because we shamefully
+ * jumped the gun before waiting for the end of
+ * the HTTP request) and so we should be prepared
+ * to read data and throw it away.
+ */
+ FD_SET_MAX(fds[i].fd, &rfds, maxfd);
+ /*
+ * Also attempt to write, if we have data to write.
+ */
+ if (fds[i].wdatapos < fds[i].wdatalen)
+ FD_SET_MAX(fds[i].fd, &wfds, maxfd);
+ break;
+ }
+
+ i++;
+ }
+ nfds = i;
+
+ ret = select(maxfd, &rfds, &wfds, NULL, NULL);
+ if (ret <= 0) {
+ if (ret < 0 && (errno != EINTR)) {
+ fprintf(stderr, "select: %s", strerror(errno));
+ exit(1);
+ }
+ continue;
+ }
+
+ for (i = 0; i < nfds; i++) {
+ switch (fds[i].type) {
+ case FD_CLIENT:
+ if (FD_ISSET(fds[i].fd, &rfds)) {
+ char buf[4096];
+ int ret = read(fds[i].fd, buf, sizeof(buf));
+ if (ret <= 0) {
+ if (ret < 0) {
+ fprintf(stderr, "standard input: read: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ return;
+ }
+ }
+ break;
+ case FD_LISTENER:
+ if (FD_ISSET(fds[i].fd, &rfds)) {
+ /*
+ * New connection has come in. Accept it.
+ */
+ struct fd *f;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ int newfd = accept(fds[i].fd, (struct sockaddr *)&addr,
+ &addrlen);
+ if (newfd < 0)
+ break; /* not sure what happened there */
+
+ f = new_fdstruct(newfd, FD_CONNECTION);
+ f->cctx = new_connection(t);
+ check_magic_access(f);
+ }
+ break;
+ case FD_CONNECTION:
+ if (FD_ISSET(fds[i].fd, &rfds)) {
+ /*
+ * There's data to be read.
+ */
+ char readbuf[4096];
+ int ret;
+
+ ret = read(fds[i].fd, readbuf, sizeof(readbuf));
+ if (ret <= 0) {
+ /*
+ * This shouldn't happen in a sensible
+ * HTTP connection, so we abandon the
+ * connection if it does.
+ */
+ close(fds[i].fd);
+ fds[i].deleted = 1;
+ break;
+ } else {
+ if (!fds[i].wdata) {
+ /*
+ * If we haven't got an HTTP response
+ * yet, keep processing data in the
+ * hope of acquiring one.
+ */
+ fds[i].wdata = got_data(fds[i].cctx, readbuf,
+ ret, fds[i].magic_access);
+ if (fds[i].wdata) {
+ fds[i].wdatalen = strlen(fds[i].wdata);
+ fds[i].wdatapos = 0;
+ }
+ } else {
+ /*
+ * Otherwise, just drop our read data
+ * on the floor.
+ */
+ }
+ }
+ }
+ if (FD_ISSET(fds[i].fd, &wfds) &&
+ fds[i].wdatapos < fds[i].wdatalen) {
+ /*
+ * The socket is writable, and we have data to
+ * write. Write it.
+ */
+ int ret = write(fds[i].fd, fds[i].wdata + fds[i].wdatapos,
+ fds[i].wdatalen - fds[i].wdatapos);
+ if (ret <= 0) {
+ /*
+ * Shouldn't happen; abandon the connection.
+ */
+ close(fds[i].fd);
+ fds[i].deleted = 1;
+ break;
+ } else {
+ fds[i].wdatapos += ret;
+ if (fds[i].wdatapos == fds[i].wdatalen) {
+ shutdown(fds[i].fd, SHUT_WR);
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ }
+}
--- /dev/null
+/*
+ * httpd.h: a minimal custom HTTP server designed to serve the
+ * pages generated by html.h.
+ */
+
+void run_httpd(const void *t);
--- /dev/null
+/*
+ * index.c: Implementation of index.h.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stddef.h>
+
+#include "trie.h"
+#include "index.h"
+#include "malloc.h"
+
+#define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
+
+#define min(x,y) ((x)<(y) ? (x):(y))
+#define max(x,y) ((x)>(y) ? (x):(y))
+
+#define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) )
+
+struct avlnode {
+ off_t children[2], element;
+ int maxdepth; /* maximum depth of this subtree */
+ unsigned long long totalsize;
+};
+
+static int index_navlnodes(int nodecount)
+{
+ int b, c, maxdepth, ret;
+
+ /*
+ * Model the tree growing at maximum imbalance. We do this by
+ * determining the number of nodes in the most unbalanced
+ * (i.e. smallest) tree of any given depth, and stopping when
+ * that's larger than nodecount.
+ */
+ ret = 0;
+ maxdepth = 1;
+ b = 0;
+ c = 1;
+ while (b <= nodecount) {
+ int tmp;
+
+ /*
+ * A tree with at most b nodes can be at most depth
+ * maxdepth-1. A tree with more than b but at most c can
+ * be at most maxdepth. Therefore, for each tree size
+ * between b and c, we might need to adjust maxdepth+1
+ * nodes.
+ */
+ tmp = min(c, nodecount);
+ ret += (tmp - b) * maxdepth;
+
+ tmp = 1 + b + c;
+ b = c;
+ c = tmp;
+ maxdepth++;
+ }
+
+ return ret;
+}
+
+off_t index_compute_size(off_t currentsize, int nodecount)
+{
+ currentsize += PADDING(currentsize, alignof(off_t));
+ currentsize += nodecount + sizeof(off_t);
+ currentsize += PADDING(currentsize, alignof(struct avlnode));
+ currentsize += index_navlnodes(nodecount) * sizeof(struct avlnode);
+
+ return currentsize;
+}
+
+/* ----------------------------------------------------------------------
+ * Functions to build the index.
+ */
+
+struct indexbuild {
+ void *t;
+ int n, nnodes, maxnodes;
+ struct avlnode *nodes;
+ off_t *roots;
+ struct avlnode *currroot;
+ struct avlnode *firstmutable;
+};
+
+#define ELEMENT(t,offset) \
+ ((offset) ? (struct trie_file *)((char *)(t) + (offset)) : NULL)
+#define NODE(t,offset) \
+ ((offset) ? (struct avlnode *)((char *)(t) + (offset)) : NULL)
+#define OFFSET(t,node) \
+ ((node) ? (off_t)((const char *)node - (const char *)t) : 0)
+#define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0)
+
+indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount)
+{
+ indexbuild *ib = snew(indexbuild);
+
+ ib->t = t;
+ startoff += PADDING(startoff, alignof(off_t));
+ ib->roots = (off_t *)((char *)t + startoff);
+ trie_set_index_offset(t, startoff);
+ startoff += nodecount * sizeof(off_t);
+ startoff += PADDING(startoff, alignof(struct avlnode));
+ ib->nodes = (struct avlnode *)((char *)t + startoff);
+ ib->maxnodes = index_navlnodes(nodecount);
+ ib->nnodes = ib->n = 0;
+ ib->currroot = NULL;
+ ib->firstmutable = ib->nodes + ib->nnodes;
+
+ return ib;
+}
+
+/*
+ * Return a mutable node, which is n or a copy of n if n is
+ * non-NULL.
+ */
+static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n)
+{
+ struct avlnode *newnode;
+
+ if (n && n >= ib->firstmutable)
+ return n; /* already mutable */
+
+ assert(ib->nnodes < ib->maxnodes);
+ newnode = ib->nodes + ib->nnodes++;
+ if (n)
+ *newnode = *n; /* structure copy */
+ return newnode;
+}
+
+/*
+ * Fix the annotations in a tree node.
+ */
+static void avl_fix(indexbuild *ib, struct avlnode *n)
+{
+ /*
+ * Make sure the max depth field is right.
+ */
+ n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])),
+ MAXDEPTH(NODE(ib->t, n->children[1])));
+
+ n->totalsize =
+ (ELEMENT(ib->t, n->element)->blocks +
+ (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) +
+ (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0));
+}
+
+static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n,
+ off_t node)
+{
+ struct trie_file *newfile;
+ struct trie_file *oldfile;
+ int subtree;
+ struct avlnode *nn;
+
+ /*
+ * Recursion bottoming out: if the subtree we're inserting
+ * into is null, just construct and return a fresh node.
+ */
+ if (!n) {
+ n = avl_makemutable(ib, NULL);
+ n->children[0] = n->children[1] = 0;
+ n->element = node;
+ avl_fix(ib, n);
+ return n;
+ }
+
+ /*
+ * Otherwise, we have to insert into an existing tree.
+ */
+
+ /*
+ * Determine which subtree to insert this node into. Ties
+ * aren't important, so we just break them any old way.
+ */
+ newfile = (struct trie_file *)((char *)ib->t + node);
+ oldfile = (struct trie_file *)((char *)ib->t + n->element);
+ if (newfile->atime > oldfile->atime)
+ subtree = 1;
+ else
+ subtree = 0;
+
+ /*
+ * Construct a copy of the node we're looking at.
+ */
+ n = avl_makemutable(ib, n);
+
+ /*
+ * Recursively insert into the next subtree down.
+ */
+ nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node);
+ n->children[subtree] = OFFSET(ib->t, nn);
+
+ /*
+ * Rebalance if necessary, to ensure that our node's children
+ * differ in maximum depth by at most one. Of course, the
+ * subtree we've just modified will be the deeper one if so.
+ */
+ if (MAXDEPTH(NODE(ib->t, n->children[subtree])) >
+ MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) {
+ struct avlnode *p, *q;
+
+ /*
+ * There are two possible cases, one of which requires a
+ * single tree rotation and the other requires two. It all
+ * depends on which subtree of the next node down (here p)
+ * is the taller. (It turns out that they can't both be
+ * the same height: any tree which has just increased in
+ * depth must have one subtree strictly taller than the
+ * other.)
+ */
+ p = NODE(ib->t, n->children[subtree]);
+ assert(p >= ib->firstmutable);
+ if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >=
+ MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) {
+ /*
+ * n p
+ * / \ / \
+ * [k] p -> n [k+1]
+ * / \ / \
+ * [k] [k+1] [k] [k]
+ */
+ n->children[subtree] = p->children[1-subtree];
+ p->children[1-subtree] = OFFSET(ib->t, n);
+ avl_fix(ib, n);
+ n = p;
+ } else {
+ q = NODE(ib->t, p->children[1-subtree]);
+ assert(q >= ib->firstmutable);
+ p->children[1-subtree] = OFFSET(ib->t, q);
+ /*
+ * n n q
+ * / \ / \ / \
+ * [k] p == [k] p -> n p
+ * / \ / \ / \ / \
+ * [k+1] [k] q [k] [k] \ / [k]
+ * / \ [k-1,k] [k-1,k]
+ * [k-1,k] [k-1,k]
+ */
+ n->children[subtree] = q->children[1-subtree];
+ p->children[1-subtree] = q->children[subtree];
+ q->children[1-subtree] = OFFSET(ib->t, n);
+ q->children[subtree] = OFFSET(ib->t, p);
+ avl_fix(ib, n);
+ avl_fix(ib, p);
+ n = q;
+ }
+ }
+
+ /*
+ * Fix up our maximum depth field.
+ */
+ avl_fix(ib, n);
+
+ /*
+ * Done.
+ */
+ return n;
+}
+
+void indexbuild_add(indexbuild *ib, const struct trie_file *tf)
+{
+ off_t node = OFFSET(ib->t, tf);
+ ib->currroot = avl_insert(ib, ib->currroot, node);
+ ib->roots[ib->n++] = OFFSET(ib->t, ib->currroot);
+ ib->firstmutable = ib->nodes + ib->nnodes;
+}
+
+off_t indexbuild_realsize(indexbuild *ib)
+{
+ return OFFSET(ib->t, (ib->nodes + ib->nnodes));
+}
+
+void indexbuild_free(indexbuild *ib)
+{
+ sfree(ib);
+}
+
+unsigned long long index_query(const void *t, int n, unsigned long long at)
+{
+ const off_t *roots;
+ const struct avlnode *node;
+ unsigned long count;
+ unsigned long long ret;
+
+ roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
+
+ if (n < 1)
+ return 0;
+ count = trie_count(t);
+ if (n > count)
+ n = count;
+
+ node = NODE(t, roots[n-1]);
+
+ ret = 0;
+
+ while (node) {
+ const struct trie_file *tf = ELEMENT(t, node->element);
+ const struct avlnode *left = NODE(t, node->children[0]);
+ const struct avlnode *right = NODE(t, node->children[1]);
+
+ if (at <= tf->atime) {
+ node = left;
+ } else {
+ if (left)
+ ret += left->totalsize;
+ ret += tf->blocks;
+ node = right;
+ }
+ }
+
+ return ret;
+}
+
+unsigned long long index_order_stat(const void *t, double f)
+{
+ const off_t *roots;
+ const struct avlnode *node;
+ unsigned long count;
+ unsigned long long size;
+
+ roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
+ count = trie_count(t);
+ node = NODE(t, roots[count-1]);
+
+ size = node->totalsize * f;
+ assert(size <= node->totalsize);
+
+ while (1) {
+ const struct trie_file *tf = ELEMENT(t, node->element);
+ const struct avlnode *left = NODE(t, node->children[0]);
+ const struct avlnode *right = NODE(t, node->children[1]);
+
+ if (left && size < left->totalsize) {
+ node = left;
+ } else if (!right ||
+ size < (left ? left->totalsize : 0) + tf->blocks) {
+ return tf->atime;
+ } else {
+ if (left)
+ size -= left->totalsize;
+ size -= tf->blocks;
+ node = right;
+ }
+ }
+}
--- /dev/null
+/*
+ * index.h: Manage indexes for agedu.
+ */
+
+#include <sys/types.h>
+
+/*
+ * Given the size of an existing data file and the number of
+ * entries required in the index, calculate and return an upper
+ * bound on the required size of the index file after an index has
+ * been written on to the end of it.
+ */
+off_t index_compute_size(off_t currentsize, int nodecount);
+
+/*
+ * Build an index, given the address of a memory-mapped data file
+ * and the starting offset within it.
+ *
+ * trie_file structures passed to tf must of course be within the
+ * bounds of the memory-mapped file.
+ *
+ * indexbuild_realsize returns the total amount of data _actually_
+ * written into the file, to allow it to be truncated afterwards.
+ */
+typedef struct indexbuild indexbuild;
+indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount);
+void indexbuild_add(indexbuild *ib, const struct trie_file *tf);
+off_t indexbuild_realsize(indexbuild *ib);
+void indexbuild_free(indexbuild *ib);
+
+/*
+ * Query an index to find the total size of records with name
+ * index strictly less than n, with atime less than at.
+ */
+unsigned long long index_query(const void *t, int n, unsigned long long at);
+
+/*
+ * Retrieve an order statistic from the index: given a fraction f,
+ * return an atime such that (at most) the requested fraction of
+ * the total data is older than it.
+ */
+unsigned long long index_order_stat(const void *t, double f);
--- /dev/null
+/*
+ * malloc.c: implementation of malloc.h
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <time.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "malloc.h"
+
+#define lenof(x) (sizeof((x))/sizeof(*(x)))
+
+extern void fatal(const char *, ...);
+
+void *smalloc(size_t size) {
+ void *p;
+ p = malloc(size);
+ if (!p) {
+ fatal("out of memory");
+ }
+ return p;
+}
+
+void sfree(void *p) {
+ if (p) {
+ free(p);
+ }
+}
+
+void *srealloc(void *p, size_t size) {
+ void *q;
+ if (p) {
+ q = realloc(p, size);
+ } else {
+ q = malloc(size);
+ }
+ if (!q)
+ fatal("out of memory");
+ return q;
+}
+
+char *dupstr(const char *s) {
+ char *r = smalloc(1+strlen(s));
+ strcpy(r,s);
+ return r;
+}
+
+char *dupfmt(const char *fmt, ...)
+{
+ int pass;
+ int totallen;
+ char *ret = NULL, *rp = NULL;
+ char datebuf[80];
+ va_list ap;
+ time_t t;
+ struct tm tm;
+ int got_time = 0;
+
+ datebuf[0] = '\0';
+ totallen = 0;
+
+ for (pass = 0; pass < 2; pass++) {
+ const char *p = fmt;
+
+ va_start(ap, fmt);
+
+ while (*p) {
+ const char *data = NULL;
+ int datalen = 0, stuffcr = 0;
+
+ if (*p == '%') {
+ p++;
+ if (*p == 'D') {
+ if (!datebuf[0]) {
+ if (!got_time) {
+ t = time(NULL);
+ tm = *gmtime(&t);
+ got_time = 1;
+ }
+ strftime(datebuf, lenof(datebuf),
+ "%a, %d %b %Y %H:%M:%S GMT", &tm);
+ }
+ data = datebuf;
+ datalen = strlen(data);
+ } else if (*p == 'd') {
+ int i = va_arg(ap, int);
+ sprintf(datebuf, "%d", i);
+ data = datebuf;
+ datalen = strlen(data);
+ } else if (*p == 's') {
+ data = va_arg(ap, const char *);
+ datalen = strlen(data);
+ } else if (assert(*p == 'S'), 1) {
+ stuffcr = va_arg(ap, int);
+ data = va_arg(ap, const char *);
+ datalen = strlen(data);
+ }
+ p++;
+ } else {
+ data = p;
+ while (*p && *p != '%') p++;
+ datalen = p - data;
+ }
+
+ if (pass == 0) {
+ totallen += datalen;
+ if (stuffcr) {
+ while (datalen > 0) {
+ if (*data == '\n')
+ totallen++;
+ data++, datalen--;
+ }
+ }
+ } else {
+ while (datalen > 0) {
+ if (stuffcr && *data == '\n')
+ *rp++ = '\r';
+ *rp++ = *data++;
+ datalen--;
+ }
+ }
+ }
+
+ va_end(ap);
+
+ if (pass == 0) {
+ rp = ret = snewn(totallen+1, char);
+ } else {
+ assert(rp - ret == totallen);
+ *rp = '\0';
+ }
+ }
+
+ return ret;
+}
--- /dev/null
+/*
+ * malloc.h: safe wrappers around malloc, realloc, free, strdup
+ */
+
+#ifndef AGEDU_MALLOC_H
+#define AGEDU_MALLOC_H
+
+#include <stddef.h>
+
+/*
+ * smalloc should guarantee to return a useful pointer.
+ */
+void *smalloc(size_t size);
+
+/*
+ * srealloc should guaranteeably be able to realloc NULL
+ */
+void *srealloc(void *p, size_t size);
+
+/*
+ * sfree should guaranteeably deal gracefully with freeing NULL
+ */
+void sfree(void *p);
+
+/*
+ * dupstr is like strdup, but with the never-return-NULL property
+ * of smalloc (and also reliably defined in all environments :-)
+ */
+char *dupstr(const char *s);
+
+/*
+ * dupfmt is a bit like printf, but does its own allocation and
+ * returns a dynamic string. It also supports a different (and
+ * much less featureful) set of format directives:
+ *
+ * - %D takes no argument, and gives the current date and time in
+ * a format suitable for an HTTP Date header
+ * - %d takes an int argument and formats it like normal %d (but
+ * doesn't support any of the configurability of standard
+ * printf)
+ * - %s takes a const char * and formats it like normal %s
+ * (again, no fine-tuning available)
+ * - %S takes an int followed by a const char *. If the int is
+ * zero, it behaves just like %s. If the int is nonzero, it
+ * transforms the string by stuffing a \r before every \n.
+ */
+char *dupfmt(const char *fmt, ...);
+
+/*
+ * snew allocates one instance of a given type, and casts the
+ * result so as to type-check that you're assigning it to the
+ * right kind of pointer. Protects against allocation bugs
+ * involving allocating the wrong size of thing.
+ */
+#define snew(type) \
+ ( (type *) smalloc (sizeof (type)) )
+
+/*
+ * snewn allocates n instances of a given type, for arrays.
+ */
+#define snewn(number, type) \
+ ( (type *) smalloc ((number) * sizeof (type)) )
+
+/*
+ * sresize wraps realloc so that you specify the new number of
+ * elements and the type of the element, with the same type-
+ * checking advantages. Also type-checks the input pointer.
+ */
+#define sresize(array, number, type) \
+ ( (void)sizeof((array)-(type *)0), \
+ (type *) srealloc ((array), (number) * sizeof (type)) )
+
+#endif /* AGEDU_MALLOC_H */
--- /dev/null
+/*
+ * trie.c: implementation of trie.h.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "malloc.h"
+#include "trie.h"
+
+#define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
+
+/*
+ * Compare functions for pathnames. Returns the relative order of
+ * the names, like strcmp; also passes back the offset of the
+ * first differing character if desired.
+ */
+static int trieccmp(unsigned char a, unsigned char b)
+{
+ a = (a == '\0' ? '\0' : a == '/' ? '\1' : a+1);
+ b = (b == '\0' ? '\0' : b == '/' ? '\1' : b+1);
+ return a - b;
+}
+
+static int triencmp(const char *a, size_t alen,
+ const char *b, size_t blen, int *offset)
+{
+ int off = 0;
+ while (off < alen && off < blen && a[off] == b[off])
+ off++;
+ if (offset)
+ *offset = off;
+ if (off == alen || off == blen) return (off == blen) - (off == alen);
+ return trieccmp(a[off], b[off]);
+}
+
+static int triecmp(const char *a, const char *b, int *offset)
+{
+ return triencmp(a, strlen(a), b, strlen(b), offset);
+}
+
+/* ----------------------------------------------------------------------
+ * Trie node structures.
+ *
+ * The trie format stored in the file consists of three distinct
+ * node types, each with a distinguishing type field at the start.
+ *
+ * TRIE_LEAF is a leaf node; it contains an actual trie_file
+ * structure, and indicates that when you're searching down the
+ * trie with a string, you should now expect to encounter
+ * end-of-string.
+ *
+ * TRIE_SWITCH indicates that the set of strings in the trie
+ * include strings with more than one distinct character after the
+ * prefix leading up to this point. Hence, it stores multiple
+ * subnode pointers and a different single character for each one.
+ *
+ * TRIE_STRING indicates that at this point everything in the trie
+ * has the same next few characters; it stores a single mandatory
+ * string fragment and exactly one subnode pointer.
+ */
+enum {
+ TRIE_LEAF = 0x7fffe000,
+ TRIE_SWITCH,
+ TRIE_STRING
+};
+
+struct trie_common {
+ int type;
+};
+
+struct trie_switchentry {
+ off_t subnode;
+ int subcount;
+};
+
+struct trie_leaf {
+ struct trie_common c;
+ struct trie_file file;
+};
+
+struct trie_switch {
+ struct trie_common c;
+ /*
+ * sw[0] to sw[len-1] give the subnode pointers and element
+ * counts. At &sw[len] is stored len single bytes which are
+ * the characters corresponding to each subnode.
+ */
+ int len;
+ struct trie_switchentry sw[];
+};
+
+struct trie_string {
+ struct trie_common c;
+ int stringlen;
+ off_t subnode;
+ char string[];
+};
+
+struct trie_header {
+ unsigned long magic;
+ off_t root, indexroot;
+ int count;
+ size_t maxpathlen;
+};
+
+/* Union only used for computing alignment */
+union trie_node {
+ struct trie_leaf leaf;
+ struct { /* fake trie_switch with indeterminate array length filled in */
+ struct trie_common c;
+ int len;
+ struct trie_switchentry sw[1];
+ } sw;
+ struct { /* fake trie_string with indeterminate array length filled in */
+ struct trie_common c;
+ int stringlen;
+ off_t subnode;
+ char string[1];
+ } str;
+};
+#define TRIE_MAGIC 0x75646761UL
+#define TRIE_ALIGN alignof(union trie_node)
+
+/* ----------------------------------------------------------------------
+ * Trie-building functions.
+ */
+
+struct tbswitch {
+ int len;
+ char c[256];
+ off_t off[256];
+ int count[256];
+};
+
+struct triebuild {
+ int fd;
+ off_t offset;
+ char *lastpath;
+ int lastlen, lastsize;
+ off_t lastoff;
+ struct tbswitch *switches;
+ int switchsize;
+ size_t maxpathlen;
+};
+
+static void tb_seek(triebuild *tb, off_t off)
+{
+ tb->offset = off;
+ if (lseek(tb->fd, off, SEEK_SET) < 0) {
+ fprintf(stderr, "agedu: lseek: %s\n", strerror(errno));
+ exit(1);
+ }
+}
+
+static void tb_write(triebuild *tb, const void *buf, size_t len)
+{
+ tb->offset += len;
+ while (len > 0) {
+ int ret = write(tb->fd, buf, len);
+ if (ret < 0) {
+ fprintf(stderr, "agedu: write: %s\n", strerror(errno));
+ exit(1);
+ }
+ len -= ret;
+ buf = (const void *)((const char *)buf + ret);
+ }
+}
+
+static char trie_align_zeroes[TRIE_ALIGN];
+
+static void tb_align(triebuild *tb)
+{
+ int off = (TRIE_ALIGN - ((tb)->offset % TRIE_ALIGN)) % TRIE_ALIGN;
+ tb_write(tb, trie_align_zeroes, off);
+}
+
+triebuild *triebuild_new(int fd)
+{
+ triebuild *tb = snew(triebuild);
+ struct trie_header th;
+
+ tb->fd = fd;
+ tb->lastpath = NULL;
+ tb->lastlen = tb->lastsize = 0;
+ tb->lastoff = 0;
+ tb->switches = NULL;
+ tb->switchsize = 0;
+ tb->maxpathlen = 0;
+
+ th.magic = TRIE_MAGIC;
+ th.root = th.count = 0;
+ th.indexroot = 0;
+ th.maxpathlen = 0;
+
+ tb_seek(tb, 0);
+ tb_write(tb, &th, sizeof(th));
+
+ return tb;
+}
+
+static off_t triebuild_unwind(triebuild *tb, int targetdepth, int *outcount)
+{
+ off_t offset;
+ int count, depth;
+
+ if (tb->lastoff == 0) {
+ *outcount = 0;
+ return 0;
+ }
+
+ offset = tb->lastoff;
+ count = 1;
+ depth = tb->lastlen + 1;
+
+ assert(depth >= targetdepth);
+
+ while (depth > targetdepth) {
+ int odepth = depth;
+ while (depth > targetdepth &&
+ (depth-1 > tb->switchsize || tb->switches[depth-1].len == 0))
+ depth--;
+ if (odepth > depth) {
+ /*
+ * Write out a string node.
+ */
+ size_t nodesize = sizeof(struct trie_string) + odepth - depth;
+ struct trie_string *st = (struct trie_string *)smalloc(nodesize);
+ st->c.type = TRIE_STRING;
+ st->stringlen = odepth - depth;
+ st->subnode = offset;
+ memcpy(st->string, tb->lastpath + depth, odepth - depth);
+ tb_align(tb);
+ offset = tb->offset;
+ tb_write(tb, st, nodesize);
+ sfree(st);
+ }
+
+ assert(depth >= targetdepth);
+ if (depth <= targetdepth)
+ break;
+
+ /*
+ * Now we expect to be sitting just below a switch node.
+ * Add our final entry to it and write it out.
+ */
+ depth--;
+ {
+ struct trie_switch *sw;
+ char *chars;
+ size_t nodesize;
+ int swlen = tb->switches[depth].len;
+ int i;
+
+ assert(swlen > 0);
+
+ tb->switches[depth].c[swlen] = tb->lastpath[depth];
+ tb->switches[depth].off[swlen] = offset;
+ tb->switches[depth].count[swlen] = count;
+ swlen++;
+
+ nodesize = sizeof(struct trie_switch) +
+ swlen * sizeof(struct trie_switchentry) + swlen;
+ sw = (struct trie_switch *)smalloc(nodesize);
+ chars = (char *)&sw->sw[swlen];
+
+ sw->c.type = TRIE_SWITCH;
+ sw->len = swlen;
+ count = 0;
+ for (i = 0; i < swlen; i++) {
+ sw->sw[i].subnode = tb->switches[depth].off[i];
+ sw->sw[i].subcount = tb->switches[depth].count[i];
+ chars[i] = tb->switches[depth].c[i];
+
+ count += tb->switches[depth].count[i];
+ }
+
+ tb_align(tb);
+ offset = tb->offset;
+ tb_write(tb, sw, nodesize);
+ sfree(sw);
+
+ tb->switches[depth].len = 0; /* clear this node */
+ }
+ }
+
+ *outcount = count;
+ return offset;
+}
+
+void triebuild_add(triebuild *tb, const char *pathname,
+ const struct trie_file *file)
+{
+ int pathlen = strlen(pathname);
+ int depth;
+
+ if (tb->maxpathlen < pathlen+1)
+ tb->maxpathlen = pathlen+1;
+
+ if (tb->lastpath) {
+ off_t offset;
+ int count;
+
+ /*
+ * Find the first differing character between this pathname
+ * and the previous one.
+ */
+ int ret = triecmp(tb->lastpath, pathname, &depth);
+ assert(ret < 0);
+
+ /*
+ * Finalise all nodes above this depth.
+ */
+ offset = triebuild_unwind(tb, depth+1, &count);
+
+ /*
+ * Add the final node we just acquired to the switch node
+ * at our chosen depth, creating it if it isn't already
+ * there.
+ */
+ if (tb->switchsize <= depth) {
+ int oldsize = tb->switchsize;
+ tb->switchsize = depth * 3 / 2 + 64;
+ tb->switches = sresize(tb->switches, tb->switchsize,
+ struct tbswitch);
+ while (oldsize < tb->switchsize)
+ tb->switches[oldsize++].len = 0;
+ }
+
+ tb->switches[depth].c[tb->switches[depth].len] = tb->lastpath[depth];
+ tb->switches[depth].off[tb->switches[depth].len] = offset;
+ tb->switches[depth].count[tb->switches[depth].len] = count;
+ tb->switches[depth].len++;
+ }
+
+ /*
+ * Write out a leaf node for the new file, and remember its
+ * file offset.
+ */
+ {
+ struct trie_leaf leaf;
+
+ leaf.c.type = TRIE_LEAF;
+ leaf.file = *file; /* structure copy */
+
+ tb_align(tb);
+ tb->lastoff = tb->offset;
+ tb_write(tb, &leaf, sizeof(leaf));
+ }
+
+ /*
+ * Store this pathname for comparison with the next one.
+ */
+ if (tb->lastsize < pathlen+1) {
+ tb->lastsize = pathlen * 3 / 2 + 64;
+ tb->lastpath = sresize(tb->lastpath, tb->lastsize, char);
+ }
+ strcpy(tb->lastpath, pathname);
+ tb->lastlen = pathlen;
+}
+
+int triebuild_finish(triebuild *tb)
+{
+ struct trie_header th;
+
+ th.magic = TRIE_MAGIC;
+ th.root = triebuild_unwind(tb, 0, &th.count);
+ th.indexroot = 0;
+ th.maxpathlen = tb->maxpathlen;
+
+ tb_seek(tb, 0);
+ tb_write(tb, &th, sizeof(th));
+
+ return th.count;
+}
+
+void triebuild_free(triebuild *tb)
+{
+ sfree(tb->switches);
+ sfree(tb->lastpath);
+ sfree(tb);
+}
+
+/* ----------------------------------------------------------------------
+ * Querying functions.
+ */
+
+#define NODE(t, off, type) \
+ ((const struct type *)((const char *)(t) + (off)))
+
+size_t trie_maxpathlen(const void *t)
+{
+ const struct trie_header *hdr = NODE(t, 0, trie_header);
+ return hdr->maxpathlen;
+}
+
+unsigned long trie_before(const void *t, const char *pathname)
+{
+ const struct trie_header *hdr = NODE(t, 0, trie_header);
+ int ret = 0, lastcount = hdr->count;
+ int len = 1 + strlen(pathname), depth = 0;
+ off_t off = hdr->root;
+
+ while (1) {
+ const struct trie_common *node = NODE(t, off, trie_common);
+ if (node->type == TRIE_LEAF) {
+ if (depth < len)
+ ret += lastcount; /* _shouldn't_ happen, but in principle */
+ return ret;
+ } else if (node->type == TRIE_STRING) {
+ const struct trie_string *st = NODE(t, off, trie_string);
+
+ int offset;
+ int cmp = triencmp(st->string, st->stringlen,
+ pathname + depth, len-depth, &offset);
+
+ if (offset < st->stringlen) {
+ if (cmp < 0)
+ ret += lastcount;
+ return ret;
+ }
+
+ depth += st->stringlen;
+ off = st->subnode;
+ } else if (node->type == TRIE_SWITCH) {
+ const struct trie_switch *sw = NODE(t, off, trie_switch);
+ const char *chars = (const char *)&sw->sw[sw->len];
+ int i;
+
+ for (i = 0; i < sw->len; i++) {
+ int c = chars[i];
+ int cmp = trieccmp(pathname[depth], c);
+ if (cmp > 0)
+ ret += sw->sw[i].subcount;
+ else if (cmp < 0)
+ return ret;
+ else {
+ off = sw->sw[i].subnode;
+ lastcount = sw->sw[i].subcount;
+ depth++;
+ break;
+ }
+ }
+ if (i == sw->len)
+ return ret;
+ }
+ }
+}
+
+void trie_getpath(const void *t, unsigned long n, char *buf)
+{
+ const struct trie_header *hdr = NODE(t, 0, trie_header);
+ int depth = 0;
+ off_t off = hdr->root;
+
+ while (1) {
+ const struct trie_common *node = NODE(t, off, trie_common);
+ if (node->type == TRIE_LEAF) {
+ assert(depth > 0 && buf[depth-1] == '\0');
+ return;
+ } else if (node->type == TRIE_STRING) {
+ const struct trie_string *st = NODE(t, off, trie_string);
+
+ memcpy(buf + depth, st->string, st->stringlen);
+ depth += st->stringlen;
+ off = st->subnode;
+ } else if (node->type == TRIE_SWITCH) {
+ const struct trie_switch *sw = NODE(t, off, trie_switch);
+ const char *chars = (const char *)&sw->sw[sw->len];
+ int i;
+
+ for (i = 0; i < sw->len; i++) {
+ if (n < sw->sw[i].subcount) {
+ buf[depth++] = chars[i];
+ off = sw->sw[i].subnode;
+ break;
+ } else
+ n -= sw->sw[i].subcount;
+ }
+ assert(i < sw->len);
+ }
+ }
+}
+
+unsigned long trie_count(const void *t)
+{
+ const struct trie_header *hdr = NODE(t, 0, trie_header);
+ return hdr->count;
+}
+
+struct triewalk_switch {
+ const struct trie_switch *sw;
+ int pos, depth, count;
+};
+struct triewalk {
+ const void *t;
+ struct triewalk_switch *switches;
+ int nswitches, switchsize;
+ int count;
+};
+triewalk *triewalk_new(const void *vt)
+{
+ triewalk *tw = snew(triewalk);
+
+ tw->t = (const char *)vt;
+ tw->switches = NULL;
+ tw->switchsize = 0;
+ tw->nswitches = -1;
+ tw->count = 0;
+
+ return tw;
+}
+const struct trie_file *triewalk_next(triewalk *tw, char *buf)
+{
+ off_t off;
+ int depth;
+
+ if (tw->nswitches < 0) {
+ const struct trie_header *hdr = NODE(tw->t, 0, trie_header);
+ off = hdr->root;
+ depth = 0;
+ tw->nswitches = 0;
+ } else {
+ while (1) {
+ int swpos;
+ const struct trie_switch *sw;
+ const char *chars;
+
+ if (tw->nswitches == 0) {
+ assert(tw->count == NODE(tw->t, 0, trie_header)->count);
+ return NULL; /* run out of trie */
+ }
+
+ swpos = tw->switches[tw->nswitches-1].pos;
+ sw = tw->switches[tw->nswitches-1].sw;
+ chars = (const char *)&sw->sw[sw->len];
+
+ if (swpos < sw->len) {
+ depth = tw->switches[tw->nswitches-1].depth;
+ off = sw->sw[swpos].subnode;
+ if (buf)
+ buf[depth++] = chars[swpos];
+ assert(tw->count == tw->switches[tw->nswitches-1].count);
+ tw->switches[tw->nswitches-1].count += sw->sw[swpos].subcount;
+ tw->switches[tw->nswitches-1].pos++;
+ break;
+ }
+
+ tw->nswitches--;
+ }
+ }
+
+ while (1) {
+ const struct trie_common *node = NODE(tw->t, off, trie_common);
+ if (node->type == TRIE_LEAF) {
+ const struct trie_leaf *lf = NODE(tw->t, off, trie_leaf);
+ if (buf)
+ assert(depth > 0 && buf[depth-1] == '\0');
+ tw->count++;
+ return &lf->file;
+ } else if (node->type == TRIE_STRING) {
+ const struct trie_string *st = NODE(tw->t, off, trie_string);
+
+ if (buf)
+ memcpy(buf + depth, st->string, st->stringlen);
+ depth += st->stringlen;
+ off = st->subnode;
+ } else if (node->type == TRIE_SWITCH) {
+ const struct trie_switch *sw = NODE(tw->t, off, trie_switch);
+ const char *chars = (const char *)&sw->sw[sw->len];
+
+ if (tw->nswitches >= tw->switchsize) {
+ tw->switchsize = tw->nswitches * 3 / 2 + 32;
+ tw->switches = sresize(tw->switches, tw->switchsize,
+ struct triewalk_switch);
+ }
+
+ tw->switches[tw->nswitches].sw = sw;
+ tw->switches[tw->nswitches].pos = 1;
+ tw->switches[tw->nswitches].depth = depth;
+ tw->switches[tw->nswitches].count = tw->count + sw->sw[0].subcount;
+ off = sw->sw[0].subnode;
+ if (buf)
+ buf[depth++] = chars[0];
+ tw->nswitches++;
+ }
+ }
+}
+void triewalk_free(triewalk *tw)
+{
+ sfree(tw->switches);
+ sfree(tw);
+}
+
+void trie_set_index_offset(void *t, off_t ptr)
+{
+ ((struct trie_header *)t)->indexroot = ptr;
+}
+off_t trie_get_index_offset(const void *t)
+{
+ return ((const struct trie_header *)t)->indexroot;
+}
--- /dev/null
+/*
+ * trie.h: functions to build and search a trie-like structure
+ * mapping pathnames to file records.
+ */
+
+#include <sys/types.h>
+
+/*
+ * An entry in the trie file describing an actual file.
+ */
+struct trie_file {
+ unsigned long long blocks;
+ unsigned long long atime;
+};
+
+/* ----------------------------------------------------------------------
+ * Functions which can be passed a list of pathnames and file data
+ * in strict order, and will build up a trie and write it out to a
+ * file.
+ */
+
+typedef struct triebuild triebuild;
+
+/*
+ * Create a new trie-building context given a writable file
+ * descriptor, which should point to the start of an empty file.
+ */
+triebuild *triebuild_new(int fd);
+
+/*
+ * Add a trie_file record to the trie. The pathnames should appear
+ * in trie collation order (i.e. strict ASCII sorting order except
+ * that / is moved so that it sorts before any other non-NUL
+ * character), on penalty of assertion failure.
+ */
+void triebuild_add(triebuild *tb, const char *pathname,
+ const struct trie_file *file);
+
+/*
+ * Put the finishing touches to the contents of the output file
+ * once all the trie_file records are present. Returns the total
+ * number of elements in the trie.
+ */
+int triebuild_finish(triebuild *tb);
+
+/*
+ * Free the context. (Does not close the file, but may leave the
+ * file pointer in an arbitrary place.)
+ */
+void triebuild_free(triebuild *tb);
+
+/* ----------------------------------------------------------------------
+ * Functions to query a trie given a pointer to the start of the
+ * memory-mapped file.
+ */
+
+/*
+ * Return the length of the longest pathname stored in the trie,
+ * including its trailing NUL.
+ */
+size_t trie_maxpathlen(const void *t);
+
+/*
+ * Determine the total number of entries in the trie which sort
+ * strictly before the given pathname (irrespective of whether the
+ * pathname actually exists in the trie).
+ */
+unsigned long trie_before(const void *t, const char *pathname);
+
+/*
+ * Return the pathname for the nth entry in the trie, written into
+ * the supplied buffer (which must be large enough, i.e. at least
+ * trie_maxpathlen(t) bytes).
+ */
+void trie_getpath(const void *t, unsigned long n, char *buf);
+
+/*
+ * Return the total number of entries in the whole trie.
+ */
+unsigned long trie_count(const void *t);
+
+/*
+ * Enumerate all the trie_file records in the trie, in sequence,
+ * and return pointers to their trie_file structures. Returns NULL
+ * at end of list, naturally.
+ *
+ * Optionally returns the pathnames as well: if "buf" is non-NULL
+ * then it is expected to point to a buffer large enough to store
+ * all the pathnames in the trie (i.e. at least trie_maxpathlen(t)
+ * bytes). This buffer is also expected to remain unchanged
+ * between calls to triewalk_next(), or else the returned
+ * pathnames will be corrupted.
+ */
+typedef struct triewalk triewalk;
+triewalk *triewalk_new(const void *t);
+const struct trie_file *triewalk_next(triewalk *tw, char *buf);
+void triewalk_free(triewalk *tw);
+
+/* ----------------------------------------------------------------------
+ * The trie file also contains an index managed by index.h, so we
+ * must be able to ask about that too.
+ */
+void trie_set_index_offset(void *t, off_t ptr);
+off_t trie_get_index_offset(const void *t);