d7d9126d7494320f82ad37fa379299d41202f003
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #define _GNU_SOURCE
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <time.h>
13 #include <assert.h>
14
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <fcntl.h>
18 #include <sys/mman.h>
19 #include <termios.h>
20 #include <sys/ioctl.h>
21 #include <fnmatch.h>
22
23 #include "du.h"
24 #include "trie.h"
25 #include "index.h"
26 #include "malloc.h"
27 #include "html.h"
28 #include "httpd.h"
29 #include "fgetline.h"
30
31 #define PNAME "agedu"
32
33 #define lenof(x) (sizeof((x))/sizeof(*(x)))
34
35 /*
36 * Path separator. This global variable affects the behaviour of
37 * various parts of the code when they need to deal with path
38 * separators. The path separator appropriate to a particular data
39 * set is encoded in the index file storing that data set; data
40 * sets generated on Unix will of course have the default '/', but
41 * foreign data sets are conceivable and must be handled correctly.
42 */
43 char pathsep = '/';
44
45 void fatal(const char *fmt, ...)
46 {
47 va_list ap;
48 fprintf(stderr, "%s: ", PNAME);
49 va_start(ap, fmt);
50 vfprintf(stderr, fmt, ap);
51 va_end(ap);
52 fprintf(stderr, "\n");
53 exit(1);
54 }
55
56 struct inclusion_exclusion {
57 int type;
58 const char *wildcard;
59 int path;
60 };
61
62 struct ctx {
63 triebuild *tb;
64 dev_t datafile_dev, filesystem_dev;
65 ino_t datafile_ino;
66 time_t last_output_update;
67 int progress, progwidth;
68 int straight_to_dump;
69 struct inclusion_exclusion *inex;
70 int ninex;
71 int crossfs;
72 };
73
74 static void dump_line(const char *pathname, const struct trie_file *tf)
75 {
76 const char *p;
77 printf("%llu %llu ", tf->size, tf->atime);
78 for (p = pathname; *p; p++) {
79 if (*p >= ' ' && *p < 127 && *p != '%')
80 putchar(*p);
81 else
82 printf("%%%02x", (unsigned char)*p);
83 }
84 putchar('\n');
85 }
86
87 static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
88 {
89 struct ctx *ctx = (struct ctx *)vctx;
90 struct trie_file file;
91 time_t t;
92 int i, include;
93 const char *filename;
94
95 /*
96 * Filter out our own data file.
97 */
98 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
99 return 0;
100
101 /*
102 * Don't cross the streams^W^Wany file system boundary.
103 */
104 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
105 return 0;
106
107 file.size = (unsigned long long)512 * st->st_blocks;
108 file.atime = st->st_atime;
109
110 /*
111 * Filter based on wildcards.
112 */
113 include = 1;
114 filename = strrchr(pathname, pathsep);
115 if (!filename)
116 filename = pathname;
117 else
118 filename++;
119 for (i = 0; i < ctx->ninex; i++) {
120 if (fnmatch(ctx->inex[i].wildcard,
121 ctx->inex[i].path ? pathname : filename, 0) == 0)
122 include = ctx->inex[i].type;
123 }
124 if (include == -1)
125 return 0; /* ignore this entry and any subdirs */
126 if (include == 0) {
127 /*
128 * Here we are supposed to be filtering an entry out, but
129 * still recursing into it if it's a directory. However,
130 * we can't actually leave out any directory whose
131 * subdirectories we then look at. So we cheat, in that
132 * case, by setting the size to zero.
133 */
134 if (!S_ISDIR(st->st_mode))
135 return 0; /* just ignore */
136 else
137 file.size = 0;
138 }
139
140 if (ctx->straight_to_dump)
141 dump_line(pathname, &file);
142 else
143 triebuild_add(ctx->tb, pathname, &file);
144
145 if (ctx->progress) {
146 t = time(NULL);
147 if (t != ctx->last_output_update) {
148 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
149 pathname);
150 fflush(stderr);
151 ctx->last_output_update = t;
152 }
153 }
154
155 return 1;
156 }
157
158 static void text_query(const void *mappedfile, const char *querydir,
159 time_t t, int depth)
160 {
161 size_t maxpathlen;
162 char *pathbuf;
163 unsigned long xi1, xi2;
164 unsigned long long s1, s2;
165
166 maxpathlen = trie_maxpathlen(mappedfile);
167 pathbuf = snewn(maxpathlen + 1, char);
168
169 /*
170 * We want to query everything between the supplied filename
171 * (inclusive) and that filename with a ^A on the end
172 * (exclusive). So find the x indices for each.
173 */
174 strcpy(pathbuf, querydir);
175 make_successor(pathbuf);
176 xi1 = trie_before(mappedfile, querydir);
177 xi2 = trie_before(mappedfile, pathbuf);
178
179 /*
180 * Now do the lookups in the age index.
181 */
182 s1 = index_query(mappedfile, xi1, t);
183 s2 = index_query(mappedfile, xi2, t);
184
185 if (s1 == s2)
186 return; /* no space taken up => no display */
187
188 /* Display in units of 1Kb */
189 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
190
191 if (depth > 0) {
192 /*
193 * Now scan for first-level subdirectories and report
194 * those too.
195 */
196 xi1++;
197 while (xi1 < xi2) {
198 trie_getpath(mappedfile, xi1, pathbuf);
199 text_query(mappedfile, pathbuf, t, depth-1);
200 make_successor(pathbuf);
201 xi1 = trie_before(mappedfile, pathbuf);
202 }
203 }
204 }
205
206 /*
207 * Largely frivolous way to define all my command-line options. I
208 * present here a parametric macro which declares a series of
209 * _logical_ option identifiers, and for each one declares zero or
210 * more short option characters and zero or more long option
211 * words. Then I repeatedly invoke that macro with its arguments
212 * defined to be various other macros, which allows me to
213 * variously:
214 *
215 * - define an enum allocating a distinct integer value to each
216 * logical option id
217 * - define a string consisting of precisely all the short option
218 * characters
219 * - define a string array consisting of all the long option
220 * strings
221 * - define (with help from auxiliary enums) integer arrays
222 * parallel to both of the above giving the logical option id
223 * for each physical short and long option
224 * - define an array indexed by logical option id indicating
225 * whether the option in question takes a value
226 * - define a function which prints out brief online help for all
227 * the options.
228 *
229 * It's not at all clear to me that this trickery is actually
230 * particularly _efficient_ - it still, after all, requires going
231 * linearly through the option list at run time and doing a
232 * strcmp, whereas in an ideal world I'd have liked the lists of
233 * long and short options to be pre-sorted so that a binary search
234 * or some other more efficient lookup was possible. (Not that
235 * asymptotic algorithmic complexity is remotely vital in option
236 * parsing, but if I were doing this in, say, Lisp or something
237 * with an equivalently powerful preprocessor then once I'd had
238 * the idea of preparing the option-parsing data structures at
239 * compile time I would probably have made the effort to prepare
240 * them _properly_. I could have Perl generate me a source file
241 * from some sort of description, I suppose, but that would seem
242 * like overkill. And in any case, it's more of a challenge to
243 * achieve as much as possible by cunning use of cpp and enum than
244 * to just write some sensible and logical code in a Turing-
245 * complete language. I said it was largely frivolous :-)
246 *
247 * This approach does have the virtue that it brings together the
248 * option ids, option spellings and help text into a single
249 * combined list and defines them all in exactly one place. If I
250 * want to add a new option, or a new spelling for an option, I
251 * only have to modify the main OPTHELP macro below and then add
252 * code to process the new logical id.
253 *
254 * (Though, really, even that isn't ideal, since it still involves
255 * modifying the source file in more than one place. In a
256 * _properly_ ideal world, I'd be able to interleave the option
257 * definitions with the code fragments that process them. And then
258 * not bother defining logical identifiers for them at all - those
259 * would be automatically generated, since I wouldn't have any
260 * need to specify them manually in another part of the code.)
261 */
262
263 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
264 HELPPFX("usage") HELPLINE("agedu [options] action [action...]") \
265 HELPPFX("actions") \
266 VAL(SCAN) SHORT(s) LONG(scan) \
267 HELPARG("directory") HELPOPT("scan and index a directory") \
268 NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \
269 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
270 HELPARG("directory") HELPOPT("scan only, generating a dump") \
271 NOVAL(LOAD) SHORT(l) LONG(load) \
272 HELPOPT("load and index a dump file") \
273 VAL(TEXT) SHORT(t) LONG(text) \
274 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
275 VAL(HTML) SHORT(H) LONG(html) \
276 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
277 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
278 HELPOPT("serve HTML reports from a temporary web server") \
279 HELPPFX("options") \
280 VAL(DATAFILE) SHORT(f) LONG(file) \
281 HELPARG("filename") HELPOPT("[all modes] specify index file") \
282 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
283 HELPOPT("[--scan] report progress on stderr") \
284 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
285 HELPOPT("[--scan] do not report progress") \
286 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
287 LONG(progress_tty) LONG(scan_progress_tty) \
288 HELPOPT("[--scan] report progress if stderr is a tty") \
289 NOVAL(CROSSFS) LONG(cross_fs) \
290 HELPOPT("[--scan] cross filesystem boundaries") \
291 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
292 HELPOPT("[--scan] stick to one filesystem") \
293 VAL(INCLUDE) LONG(include) \
294 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
295 VAL(INCLUDEPATH) LONG(include_path) \
296 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
297 VAL(EXCLUDE) LONG(exclude) \
298 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
299 VAL(EXCLUDEPATH) LONG(exclude_path) \
300 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
301 VAL(PRUNE) LONG(prune) \
302 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
303 VAL(PRUNEPATH) LONG(prune_path) \
304 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
305 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
306 HELPARG("age") HELPOPT("[--text] include only files older than this") \
307 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
308 HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \
309 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
310 LONG(server_addr) \
311 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
312 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
313 LONG(server_auth) LONG(web_auth) \
314 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
315 VAL(AUTHFILE) LONG(auth_file) \
316 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
317 VAL(AUTHFD) LONG(auth_fd) \
318 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
319 HELPPFX("also") \
320 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
321 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
322 NOVAL(LICENCE) LONG(licence) LONG(license) \
323 HELPOPT("display (MIT) licence text") \
324
325 #define IGNORE(x)
326 #define DEFENUM(x) OPT_ ## x,
327 #define ZERO(x) 0,
328 #define ONE(x) 1,
329 #define STRING(x) #x ,
330 #define STRINGNOCOMMA(x) #x
331 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
332 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
333 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
334 #define SHORTTMP(x) SHORTtmp3_ ## x,
335 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
336 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
337 #define LONGOPTVAL(x) LONGVAL_ ## x,
338 #define LONGTMP(x) SHORTtmp3_ ## x,
339
340 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
341 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
342
343 enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
344 enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
345 enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
346 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
347 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
348 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
349 enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
350 enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
351 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
352 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
353
354 static void usage(FILE *fp)
355 {
356 char longbuf[80];
357 const char *prefix, *shortopt, *longopt, *optarg;
358 int i, optex;
359
360 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
361 #define HELPNOVAL(s) optex = 0;
362 #define HELPVAL(s) optex = 1;
363 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
364 #define HELPLONG(s) if (!longopt) { \
365 strcpy(longbuf, "--" #s); longopt = longbuf; \
366 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
367 #define HELPPFX(s) prefix = s;
368 #define HELPARG(s) optarg = s;
369 #define HELPLINE(s) assert(optex == -1); \
370 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
371 HELPRESET;
372 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
373 assert(shortopt || longopt); \
374 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
375 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
376 optarg?" ":"", optarg?optarg:""); \
377 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
378
379 HELPRESET;
380 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
381 HELPPFX, HELPARG, HELPLINE, HELPOPT);
382
383 #undef HELPRESET
384 #undef HELPNOVAL
385 #undef HELPVAL
386 #undef HELPSHORT
387 #undef HELPLONG
388 #undef HELPPFX
389 #undef HELPARG
390 #undef HELPLINE
391 #undef HELPOPT
392 }
393
394 static time_t parse_age(time_t now, const char *agestr)
395 {
396 time_t t;
397 struct tm tm;
398 int nunits;
399 char unit[2];
400
401 t = now;
402
403 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
404 fprintf(stderr, "%s: age specification should be a number followed by"
405 " one of d,w,m,y\n", PNAME);
406 exit(1);
407 }
408
409 if (unit[0] == 'd') {
410 t -= 86400 * nunits;
411 } else if (unit[0] == 'w') {
412 t -= 86400 * 7 * nunits;
413 } else {
414 int ym;
415
416 tm = *localtime(&t);
417 ym = tm.tm_year * 12 + tm.tm_mon;
418
419 if (unit[0] == 'm')
420 ym -= nunits;
421 else
422 ym -= 12 * nunits;
423
424 tm.tm_year = ym / 12;
425 tm.tm_mon = ym % 12;
426
427 t = mktime(&tm);
428 }
429
430 return t;
431 }
432
433 int main(int argc, char **argv)
434 {
435 int fd, count;
436 struct ctx actx, *ctx = &actx;
437 struct stat st;
438 off_t totalsize, realsize;
439 void *mappedfile;
440 triewalk *tw;
441 indexbuild *ib;
442 const struct trie_file *tf;
443 char *filename = "agedu.dat";
444 int doing_opts = 1;
445 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD };
446 struct action {
447 int mode;
448 char *arg;
449 } *actions = NULL;
450 int nactions = 0, actionsize = 0, action;
451 time_t now = time(NULL);
452 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
453 int htmlautoagerange = 1;
454 const char *httpserveraddr = NULL;
455 int httpserverport = 0;
456 const char *httpauthdata = NULL;
457 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
458 int progress = 1;
459 struct inclusion_exclusion *inex = NULL;
460 int ninex = 0, inexsize = 0;
461 int crossfs = 0;
462
463 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
464 {
465 static const char *const optnames[NOPTIONS] = {
466 OPTIONS(STRING,STRING,IGNORE,IGNORE)
467 };
468 int i;
469 for (i = 0; i < NSHORTOPTS; i++)
470 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
471 opthasval[shortvals[i]] ? "value" : "no value");
472 for (i = 0; i < NLONGOPTS; i++)
473 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
474 opthasval[longvals[i]] ? "value" : "no value");
475 }
476 #endif
477
478 while (--argc > 0) {
479 char *p = *++argv;
480
481 if (doing_opts && *p == '-') {
482 int wordstart = 1;
483
484 if (!strcmp(p, "--")) {
485 doing_opts = 0;
486 continue;
487 }
488
489 p++;
490 while (*p) {
491 int optid = -1;
492 int i;
493 char *optval;
494
495 if (wordstart && *p == '-') {
496 /*
497 * GNU-style long option.
498 */
499 p++;
500 optval = strchr(p, '=');
501 if (optval)
502 *optval++ = '\0';
503
504 for (i = 0; i < NLONGOPTS; i++) {
505 const char *opt = longopts[i], *s = p;
506 int match = 1;
507 /*
508 * The underscores in the option names
509 * defined above may be given by the user
510 * as underscores or dashes, or omitted
511 * entirely.
512 */
513 while (*opt) {
514 if (*opt == '_') {
515 if (*s == '-' || *s == '_')
516 s++;
517 } else {
518 if (*opt != *s) {
519 match = 0;
520 break;
521 }
522 s++;
523 }
524 opt++;
525 }
526 if (match && !*s) {
527 optid = longvals[i];
528 break;
529 }
530 }
531
532 if (optid < 0) {
533 fprintf(stderr, "%s: unrecognised option '--%s'\n",
534 PNAME, p);
535 return 1;
536 }
537
538 if (!opthasval[optid]) {
539 if (optval) {
540 fprintf(stderr, "%s: unexpected argument to option"
541 " '--%s'\n", PNAME, p);
542 return 1;
543 }
544 } else {
545 if (!optval) {
546 if (--argc > 0) {
547 optval = *++argv;
548 } else {
549 fprintf(stderr, "%s: option '--%s' expects"
550 " an argument\n", PNAME, p);
551 return 1;
552 }
553 }
554 }
555
556 p += strlen(p); /* finished with this argument word */
557 } else {
558 /*
559 * Short option.
560 */
561 char c = *p++;
562
563 for (i = 0; i < NSHORTOPTS; i++)
564 if (c == shortopts[i]) {
565 optid = shortvals[i];
566 break;
567 }
568
569 if (optid < 0) {
570 fprintf(stderr, "%s: unrecognised option '-%c'\n",
571 PNAME, c);
572 return 1;
573 }
574
575 if (opthasval[optid]) {
576 if (*p) {
577 optval = p;
578 p += strlen(p);
579 } else if (--argc > 0) {
580 optval = *++argv;
581 } else {
582 fprintf(stderr, "%s: option '-%c' expects"
583 " an argument\n", PNAME, c);
584 return 1;
585 }
586 } else {
587 optval = NULL;
588 }
589 }
590
591 wordstart = 0;
592
593 /*
594 * Now actually process the option.
595 */
596 switch (optid) {
597 case OPT_HELP:
598 usage(stdout);
599 return 0;
600 case OPT_VERSION:
601 printf("FIXME: version();\n");
602 return 0;
603 case OPT_LICENCE:
604 printf("FIXME: licence();\n");
605 return 0;
606 case OPT_SCAN:
607 if (nactions >= actionsize) {
608 actionsize = nactions * 3 / 2 + 16;
609 actions = sresize(actions, actionsize, struct action);
610 }
611 actions[nactions].mode = SCAN;
612 actions[nactions].arg = optval;
613 nactions++;
614 break;
615 case OPT_SCANDUMP:
616 if (nactions >= actionsize) {
617 actionsize = nactions * 3 / 2 + 16;
618 actions = sresize(actions, actionsize, struct action);
619 }
620 actions[nactions].mode = SCANDUMP;
621 actions[nactions].arg = optval;
622 nactions++;
623 break;
624 case OPT_DUMP:
625 if (nactions >= actionsize) {
626 actionsize = nactions * 3 / 2 + 16;
627 actions = sresize(actions, actionsize, struct action);
628 }
629 actions[nactions].mode = DUMP;
630 actions[nactions].arg = NULL;
631 nactions++;
632 break;
633 case OPT_LOAD:
634 if (nactions >= actionsize) {
635 actionsize = nactions * 3 / 2 + 16;
636 actions = sresize(actions, actionsize, struct action);
637 }
638 actions[nactions].mode = LOAD;
639 actions[nactions].arg = NULL;
640 nactions++;
641 break;
642 case OPT_TEXT:
643 if (nactions >= actionsize) {
644 actionsize = nactions * 3 / 2 + 16;
645 actions = sresize(actions, actionsize, struct action);
646 }
647 actions[nactions].mode = TEXT;
648 actions[nactions].arg = optval;
649 nactions++;
650 break;
651 case OPT_HTML:
652 if (nactions >= actionsize) {
653 actionsize = nactions * 3 / 2 + 16;
654 actions = sresize(actions, actionsize, struct action);
655 }
656 actions[nactions].mode = HTML;
657 actions[nactions].arg = optval;
658 nactions++;
659 break;
660 case OPT_HTTPD:
661 if (nactions >= actionsize) {
662 actionsize = nactions * 3 / 2 + 16;
663 actions = sresize(actions, actionsize, struct action);
664 }
665 actions[nactions].mode = HTTPD;
666 actions[nactions].arg = NULL;
667 nactions++;
668 break;
669 case OPT_PROGRESS:
670 progress = 2;
671 break;
672 case OPT_NOPROGRESS:
673 progress = 0;
674 break;
675 case OPT_TTYPROGRESS:
676 progress = 1;
677 break;
678 case OPT_CROSSFS:
679 crossfs = 1;
680 break;
681 case OPT_NOCROSSFS:
682 crossfs = 0;
683 break;
684 case OPT_DATAFILE:
685 filename = optval;
686 break;
687 case OPT_MINAGE:
688 textcutoff = parse_age(now, optval);
689 break;
690 case OPT_AGERANGE:
691 if (!strcmp(optval, "auto")) {
692 htmlautoagerange = 1;
693 } else {
694 char *q = optval + strcspn(optval, "-:");
695 if (*q)
696 *q++ = '\0';
697 htmloldest = parse_age(now, optval);
698 htmlnewest = *q ? parse_age(now, q) : now;
699 htmlautoagerange = 0;
700 }
701 break;
702 case OPT_SERVERADDR:
703 {
704 char *port;
705 if (optval[0] == '[' &&
706 (port = strchr(optval, ']')) != NULL)
707 port++;
708 else
709 port = optval;
710 port += strcspn(port, ":");
711 if (port)
712 *port++ = '\0';
713 httpserveraddr = optval;
714 httpserverport = atoi(port);
715 }
716 break;
717 case OPT_AUTH:
718 if (!strcmp(optval, "magic"))
719 auth = HTTPD_AUTH_MAGIC;
720 else if (!strcmp(optval, "basic"))
721 auth = HTTPD_AUTH_BASIC;
722 else if (!strcmp(optval, "none"))
723 auth = HTTPD_AUTH_NONE;
724 else if (!strcmp(optval, "default"))
725 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
726 else if (!strcmp(optval, "help") ||
727 !strcmp(optval, "list")) {
728 printf("agedu: supported HTTP authentication types"
729 " are:\n"
730 " magic use Linux /proc/net/tcp to"
731 " determine owner of peer socket\n"
732 " basic HTTP Basic username and"
733 " password authentication\n"
734 " default use 'magic' if possible, "
735 " otherwise fall back to 'basic'\n"
736 " none unauthenticated HTTP (if"
737 " the data file is non-confidential)\n");
738 return 0;
739 } else {
740 fprintf(stderr, "%s: unrecognised authentication"
741 " type '%s'\n%*s options are 'magic',"
742 " 'basic', 'none', 'default'\n",
743 PNAME, optval, (int)strlen(PNAME), "");
744 return 1;
745 }
746 break;
747 case OPT_AUTHFILE:
748 case OPT_AUTHFD:
749 {
750 int fd;
751 char namebuf[40];
752 const char *name;
753 char *authbuf;
754 int authlen, authsize;
755 int ret;
756
757 if (optid == OPT_AUTHFILE) {
758 fd = open(optval, O_RDONLY);
759 if (fd < 0) {
760 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
761 optval, strerror(errno));
762 return 1;
763 }
764 name = optval;
765 } else {
766 fd = atoi(optval);
767 name = namebuf;
768 sprintf(namebuf, "fd %d", fd);
769 }
770
771 authlen = 0;
772 authsize = 256;
773 authbuf = snewn(authsize, char);
774 while ((ret = read(fd, authbuf+authlen,
775 authsize-authlen)) > 0) {
776 authlen += ret;
777 if ((authsize - authlen) < (authsize / 16)) {
778 authsize = authlen * 3 / 2 + 4096;
779 authbuf = sresize(authbuf, authsize, char);
780 }
781 }
782 if (ret < 0) {
783 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
784 name, strerror(errno));
785 return 1;
786 }
787 if (optid == OPT_AUTHFILE)
788 close(fd);
789 httpauthdata = authbuf;
790 }
791 break;
792 case OPT_INCLUDE:
793 case OPT_INCLUDEPATH:
794 case OPT_EXCLUDE:
795 case OPT_EXCLUDEPATH:
796 case OPT_PRUNE:
797 case OPT_PRUNEPATH:
798 if (ninex >= inexsize) {
799 inexsize = ninex * 3 / 2 + 16;
800 inex = sresize(inex, inexsize,
801 struct inclusion_exclusion);
802 }
803 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
804 optid == OPT_EXCLUDEPATH ||
805 optid == OPT_PRUNEPATH);
806 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
807 optid == OPT_INCLUDEPATH ? 1 :
808 optid == OPT_EXCLUDE ? 0 :
809 optid == OPT_EXCLUDEPATH ? 0 :
810 optid == OPT_PRUNE ? -1 :
811 /* optid == OPT_PRUNEPATH ? */ -1);
812 inex[ninex].wildcard = optval;
813 ninex++;
814 break;
815 }
816 }
817 } else {
818 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
819 return 1;
820 }
821 }
822
823 if (nactions == 0) {
824 usage(stderr);
825 return 1;
826 }
827
828 for (action = 0; action < nactions; action++) {
829 int mode = actions[action].mode;
830
831 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
832 const char *scandir = actions[action].arg;
833 if (mode == LOAD) {
834 char *buf = fgetline(stdin);
835 unsigned newpathsep;
836 buf[strcspn(buf, "\r\n")] = '\0';
837 if (1 != sscanf(buf, "agedu dump file. pathsep=%x",
838 &newpathsep)) {
839 fprintf(stderr, "%s: header in dump file not recognised\n",
840 PNAME);
841 return 1;
842 }
843 pathsep = (char)newpathsep;
844 sfree(buf);
845 }
846
847 if (mode == SCAN || mode == LOAD) {
848 /*
849 * Prepare to write out the index file.
850 */
851 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
852 if (fd < 0) {
853 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
854 strerror(errno));
855 return 1;
856 }
857 if (fstat(fd, &st) < 0) {
858 perror("agedu: fstat");
859 return 1;
860 }
861 ctx->datafile_dev = st.st_dev;
862 ctx->datafile_ino = st.st_ino;
863 ctx->straight_to_dump = 0;
864 } else {
865 ctx->datafile_dev = -1;
866 ctx->datafile_ino = -1;
867 ctx->straight_to_dump = 1;
868 }
869
870 if (mode == SCAN || mode == SCANDUMP) {
871 if (stat(scandir, &st) < 0) {
872 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
873 strerror(errno));
874 return 1;
875 }
876 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
877 }
878
879 ctx->inex = inex;
880 ctx->ninex = ninex;
881 ctx->crossfs = crossfs;
882
883 ctx->last_output_update = time(NULL);
884
885 /* progress==1 means report progress only if stderr is a tty */
886 if (progress == 1)
887 progress = isatty(2) ? 2 : 0;
888 ctx->progress = progress;
889 {
890 struct winsize ws;
891 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
892 ctx->progwidth = ws.ws_col - 1;
893 else
894 ctx->progwidth = 79;
895 }
896
897 if (mode == SCANDUMP)
898 printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep);
899
900 /*
901 * Scan the directory tree, and write out the trie component
902 * of the data file.
903 */
904 if (mode != SCANDUMP) {
905 ctx->tb = triebuild_new(fd);
906 }
907 if (mode == LOAD) {
908 char *buf;
909 int line = 2;
910 while ((buf = fgetline(stdin)) != NULL) {
911 struct trie_file tf;
912 char *p, *q;
913
914 buf[strcspn(buf, "\r\n")] = '\0';
915
916 p = buf;
917 q = p;
918 while (*p && *p != ' ') p++;
919 if (!*p) {
920 fprintf(stderr, "%s: dump file line %d: expected at least"
921 " three fields\n", PNAME, line);
922 return 1;
923 }
924 *p++ = '\0';
925 tf.size = strtoull(q, NULL, 10);
926 q = p;
927 while (*p && *p != ' ') p++;
928 if (!*p) {
929 fprintf(stderr, "%s: dump file line %d: expected at least"
930 " three fields\n", PNAME, line);
931 return 1;
932 }
933 *p++ = '\0';
934 tf.atime = strtoull(q, NULL, 10);
935 q = buf;
936 while (*p) {
937 int c = *p;
938 if (*p == '%') {
939 int i;
940 p++;
941 c = 0;
942 for (i = 0; i < 2; i++) {
943 if (*p >= '0' && *p <= '9')
944 c += *p - '0';
945 else if (*p >= 'A' && *p <= 'F')
946 c += *p - ('A' - 10);
947 else if (*p >= 'a' && *p <= 'f')
948 c += *p - ('a' - 10);
949 else {
950 fprintf(stderr, "%s: dump file line %d: unable"
951 " to parse hex escape\n", PNAME, line);
952 }
953 p++;
954 }
955 }
956 *q++ = c;
957 p++;
958 }
959 *q = '\0';
960 triebuild_add(ctx->tb, buf, &tf);
961 sfree(buf);
962 }
963 } else {
964 du(scandir, gotdata, ctx);
965 }
966 if (mode != SCANDUMP) {
967 count = triebuild_finish(ctx->tb);
968 triebuild_free(ctx->tb);
969
970 if (ctx->progress) {
971 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
972 fflush(stderr);
973 }
974
975 /*
976 * Work out how much space the cumulative index trees
977 * will take; enlarge the file, and memory-map it.
978 */
979 if (fstat(fd, &st) < 0) {
980 perror("agedu: fstat");
981 return 1;
982 }
983
984 printf("Built pathname index, %d entries, %ju bytes\n", count,
985 (intmax_t)st.st_size);
986
987 totalsize = index_compute_size(st.st_size, count);
988
989 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
990 perror("agedu: lseek");
991 return 1;
992 }
993 if (write(fd, "\0", 1) < 1) {
994 perror("agedu: write");
995 return 1;
996 }
997
998 printf("Upper bound on index file size = %ju bytes\n",
999 (intmax_t)totalsize);
1000
1001 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1002 if (!mappedfile) {
1003 perror("agedu: mmap");
1004 return 1;
1005 }
1006
1007 ib = indexbuild_new(mappedfile, st.st_size, count);
1008 tw = triewalk_new(mappedfile);
1009 while ((tf = triewalk_next(tw, NULL)) != NULL)
1010 indexbuild_add(ib, tf);
1011 triewalk_free(tw);
1012 realsize = indexbuild_realsize(ib);
1013 indexbuild_free(ib);
1014
1015 munmap(mappedfile, totalsize);
1016 ftruncate(fd, realsize);
1017 close(fd);
1018 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
1019 }
1020 } else if (mode == TEXT) {
1021 char *querydir = actions[action].arg;
1022 size_t pathlen;
1023
1024 fd = open(filename, O_RDONLY);
1025 if (fd < 0) {
1026 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1027 strerror(errno));
1028 return 1;
1029 }
1030 if (fstat(fd, &st) < 0) {
1031 perror("agedu: fstat");
1032 return 1;
1033 }
1034 totalsize = st.st_size;
1035 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1036 if (!mappedfile) {
1037 perror("agedu: mmap");
1038 return 1;
1039 }
1040 pathsep = trie_pathsep(mappedfile);
1041
1042 /*
1043 * Trim trailing slash, just in case.
1044 */
1045 pathlen = strlen(querydir);
1046 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1047 querydir[--pathlen] = '\0';
1048
1049 text_query(mappedfile, querydir, textcutoff, 1);
1050 } else if (mode == HTML) {
1051 char *querydir = actions[action].arg;
1052 size_t pathlen;
1053 struct html_config cfg;
1054 unsigned long xi;
1055 char *html;
1056
1057 fd = open(filename, O_RDONLY);
1058 if (fd < 0) {
1059 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1060 strerror(errno));
1061 return 1;
1062 }
1063 if (fstat(fd, &st) < 0) {
1064 perror("agedu: fstat");
1065 return 1;
1066 }
1067 totalsize = st.st_size;
1068 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1069 if (!mappedfile) {
1070 perror("agedu: mmap");
1071 return 1;
1072 }
1073 pathsep = trie_pathsep(mappedfile);
1074
1075 /*
1076 * Trim trailing slash, just in case.
1077 */
1078 pathlen = strlen(querydir);
1079 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1080 querydir[--pathlen] = '\0';
1081
1082 xi = trie_before(mappedfile, querydir);
1083 cfg.format = NULL;
1084 cfg.autoage = htmlautoagerange;
1085 cfg.oldest = htmloldest;
1086 cfg.newest = htmlnewest;
1087 html = html_query(mappedfile, xi, &cfg);
1088 fputs(html, stdout);
1089 } else if (mode == DUMP) {
1090 size_t maxpathlen;
1091 char *buf;
1092
1093 fd = open(filename, O_RDONLY);
1094 if (fd < 0) {
1095 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1096 strerror(errno));
1097 return 1;
1098 }
1099 if (fstat(fd, &st) < 0) {
1100 perror("agedu: fstat");
1101 return 1;
1102 }
1103 totalsize = st.st_size;
1104 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1105 if (!mappedfile) {
1106 perror("agedu: mmap");
1107 return 1;
1108 }
1109 pathsep = trie_pathsep(mappedfile);
1110
1111 maxpathlen = trie_maxpathlen(mappedfile);
1112 buf = snewn(maxpathlen, char);
1113
1114 printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep);
1115 tw = triewalk_new(mappedfile);
1116 while ((tf = triewalk_next(tw, buf)) != NULL)
1117 dump_line(buf, tf);
1118 triewalk_free(tw);
1119 } else if (mode == HTTPD) {
1120 struct html_config pcfg;
1121 struct httpd_config dcfg;
1122
1123 fd = open(filename, O_RDONLY);
1124 if (fd < 0) {
1125 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1126 strerror(errno));
1127 return 1;
1128 }
1129 if (fstat(fd, &st) < 0) {
1130 perror("agedu: fstat");
1131 return 1;
1132 }
1133 totalsize = st.st_size;
1134 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1135 if (!mappedfile) {
1136 perror("agedu: mmap");
1137 return 1;
1138 }
1139 pathsep = trie_pathsep(mappedfile);
1140
1141 dcfg.address = httpserveraddr;
1142 dcfg.port = httpserverport;
1143 dcfg.basicauthdata = httpauthdata;
1144 pcfg.format = NULL;
1145 pcfg.autoage = htmlautoagerange;
1146 pcfg.oldest = htmloldest;
1147 pcfg.newest = htmlnewest;
1148 run_httpd(mappedfile, auth, &dcfg, &pcfg);
1149 }
1150 }
1151
1152 return 0;
1153 }