246f0020c82e8bdf92b27aa900904c1a97c1acf9
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #define _GNU_SOURCE
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <time.h>
13 #include <assert.h>
14
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <fcntl.h>
18 #include <sys/mman.h>
19 #include <termios.h>
20 #include <sys/ioctl.h>
21 #include <fnmatch.h>
22
23 #include "du.h"
24 #include "trie.h"
25 #include "index.h"
26 #include "malloc.h"
27 #include "html.h"
28 #include "httpd.h"
29 #include "fgetline.h"
30
31 #define PNAME "agedu"
32
33 #define lenof(x) (sizeof((x))/sizeof(*(x)))
34
35 /*
36 * Path separator. This global variable affects the behaviour of
37 * various parts of the code when they need to deal with path
38 * separators. The path separator appropriate to a particular data
39 * set is encoded in the index file storing that data set; data
40 * sets generated on Unix will of course have the default '/', but
41 * foreign data sets are conceivable and must be handled correctly.
42 */
43 char pathsep = '/';
44
45 void fatal(const char *fmt, ...)
46 {
47 va_list ap;
48 fprintf(stderr, "%s: ", PNAME);
49 va_start(ap, fmt);
50 vfprintf(stderr, fmt, ap);
51 va_end(ap);
52 fprintf(stderr, "\n");
53 exit(1);
54 }
55
56 struct inclusion_exclusion {
57 int type;
58 const char *wildcard;
59 int path;
60 };
61
62 struct ctx {
63 triebuild *tb;
64 dev_t datafile_dev, filesystem_dev;
65 ino_t datafile_ino;
66 time_t last_output_update;
67 int progress, progwidth;
68 int straight_to_dump;
69 struct inclusion_exclusion *inex;
70 int ninex;
71 int crossfs;
72 };
73
74 static void dump_line(const char *pathname, const struct trie_file *tf)
75 {
76 const char *p;
77 printf("%llu %llu ", tf->size, tf->atime);
78 for (p = pathname; *p; p++) {
79 if (*p >= ' ' && *p < 127 && *p != '%')
80 putchar(*p);
81 else
82 printf("%%%02x", (unsigned char)*p);
83 }
84 putchar('\n');
85 }
86
87 static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
88 {
89 struct ctx *ctx = (struct ctx *)vctx;
90 struct trie_file file;
91 time_t t;
92 int i, include;
93 const char *filename;
94
95 /*
96 * Filter out our own data file.
97 */
98 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
99 return 0;
100
101 /*
102 * Don't cross the streams^W^Wany file system boundary.
103 */
104 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
105 return 0;
106
107 file.size = (unsigned long long)512 * st->st_blocks;
108 file.atime = st->st_atime;
109
110 /*
111 * Filter based on wildcards.
112 */
113 include = 1;
114 filename = strrchr(pathname, pathsep);
115 if (!filename)
116 filename = pathname;
117 else
118 filename++;
119 for (i = 0; i < ctx->ninex; i++) {
120 if (fnmatch(ctx->inex[i].wildcard,
121 ctx->inex[i].path ? pathname : filename, 0) == 0)
122 include = ctx->inex[i].type;
123 }
124 if (include == -1)
125 return 0; /* ignore this entry and any subdirs */
126 if (include == 0) {
127 /*
128 * Here we are supposed to be filtering an entry out, but
129 * still recursing into it if it's a directory. However,
130 * we can't actually leave out any directory whose
131 * subdirectories we then look at. So we cheat, in that
132 * case, by setting the size to zero.
133 */
134 if (!S_ISDIR(st->st_mode))
135 return 0; /* just ignore */
136 else
137 file.size = 0;
138 }
139
140 if (ctx->straight_to_dump)
141 dump_line(pathname, &file);
142 else
143 triebuild_add(ctx->tb, pathname, &file);
144
145 if (ctx->progress) {
146 t = time(NULL);
147 if (t != ctx->last_output_update) {
148 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
149 pathname);
150 fflush(stderr);
151 ctx->last_output_update = t;
152 }
153 }
154
155 return 1;
156 }
157
158 static void text_query(const void *mappedfile, const char *querydir,
159 time_t t, int depth)
160 {
161 size_t maxpathlen;
162 char *pathbuf;
163 unsigned long xi1, xi2;
164 unsigned long long s1, s2;
165
166 maxpathlen = trie_maxpathlen(mappedfile);
167 pathbuf = snewn(maxpathlen + 1, char);
168
169 /*
170 * We want to query everything between the supplied filename
171 * (inclusive) and that filename with a ^A on the end
172 * (exclusive). So find the x indices for each.
173 */
174 sprintf(pathbuf, "%s\001", querydir);
175 xi1 = trie_before(mappedfile, querydir);
176 xi2 = trie_before(mappedfile, pathbuf);
177
178 /*
179 * Now do the lookups in the age index.
180 */
181 s1 = index_query(mappedfile, xi1, t);
182 s2 = index_query(mappedfile, xi2, t);
183
184 if (s1 == s2)
185 return; /* no space taken up => no display */
186
187 /* Display in units of 1Kb */
188 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
189
190 if (depth > 0) {
191 /*
192 * Now scan for first-level subdirectories and report
193 * those too.
194 */
195 xi1++;
196 while (xi1 < xi2) {
197 trie_getpath(mappedfile, xi1, pathbuf);
198 text_query(mappedfile, pathbuf, t, depth-1);
199 strcat(pathbuf, "\001");
200 xi1 = trie_before(mappedfile, pathbuf);
201 }
202 }
203 }
204
205 /*
206 * Largely frivolous way to define all my command-line options. I
207 * present here a parametric macro which declares a series of
208 * _logical_ option identifiers, and for each one declares zero or
209 * more short option characters and zero or more long option
210 * words. Then I repeatedly invoke that macro with its arguments
211 * defined to be various other macros, which allows me to
212 * variously:
213 *
214 * - define an enum allocating a distinct integer value to each
215 * logical option id
216 * - define a string consisting of precisely all the short option
217 * characters
218 * - define a string array consisting of all the long option
219 * strings
220 * - define (with help from auxiliary enums) integer arrays
221 * parallel to both of the above giving the logical option id
222 * for each physical short and long option
223 * - define an array indexed by logical option id indicating
224 * whether the option in question takes a value
225 * - define a function which prints out brief online help for all
226 * the options.
227 *
228 * It's not at all clear to me that this trickery is actually
229 * particularly _efficient_ - it still, after all, requires going
230 * linearly through the option list at run time and doing a
231 * strcmp, whereas in an ideal world I'd have liked the lists of
232 * long and short options to be pre-sorted so that a binary search
233 * or some other more efficient lookup was possible. (Not that
234 * asymptotic algorithmic complexity is remotely vital in option
235 * parsing, but if I were doing this in, say, Lisp or something
236 * with an equivalently powerful preprocessor then once I'd had
237 * the idea of preparing the option-parsing data structures at
238 * compile time I would probably have made the effort to prepare
239 * them _properly_. I could have Perl generate me a source file
240 * from some sort of description, I suppose, but that would seem
241 * like overkill. And in any case, it's more of a challenge to
242 * achieve as much as possible by cunning use of cpp and enum than
243 * to just write some sensible and logical code in a Turing-
244 * complete language. I said it was largely frivolous :-)
245 *
246 * This approach does have the virtue that it brings together the
247 * option ids, option spellings and help text into a single
248 * combined list and defines them all in exactly one place. If I
249 * want to add a new option, or a new spelling for an option, I
250 * only have to modify the main OPTHELP macro below and then add
251 * code to process the new logical id.
252 *
253 * (Though, really, even that isn't ideal, since it still involves
254 * modifying the source file in more than one place. In a
255 * _properly_ ideal world, I'd be able to interleave the option
256 * definitions with the code fragments that process them. And then
257 * not bother defining logical identifiers for them at all - those
258 * would be automatically generated, since I wouldn't have any
259 * need to specify them manually in another part of the code.)
260 */
261
262 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
263 HELPPFX("usage") HELPLINE("agedu [options] action") \
264 HELPPFX("actions") \
265 VAL(SCAN) SHORT(s) LONG(scan) \
266 HELPARG("directory") HELPOPT("scan and index a directory") \
267 NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \
268 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
269 HELPARG("directory") HELPOPT("scan only, generating a dump") \
270 NOVAL(LOAD) SHORT(l) LONG(load) \
271 HELPOPT("load and index a dump file") \
272 VAL(TEXT) SHORT(t) LONG(text) \
273 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
274 VAL(HTML) SHORT(H) LONG(html) \
275 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
276 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
277 HELPOPT("serve HTML reports from a temporary web server") \
278 HELPPFX("options") \
279 VAL(DATAFILE) SHORT(f) LONG(file) \
280 HELPARG("filename") HELPOPT("[all modes] specify index file") \
281 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
282 HELPOPT("[--scan] report progress on stderr") \
283 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
284 HELPOPT("[--scan] do not report progress") \
285 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
286 LONG(progress_tty) LONG(scan_progress_tty) \
287 HELPOPT("[--scan] report progress if stderr is a tty") \
288 NOVAL(CROSSFS) LONG(cross_fs) \
289 HELPOPT("[--scan] cross filesystem boundaries") \
290 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
291 HELPOPT("[--scan] stick to one filesystem") \
292 VAL(INCLUDE) LONG(include) \
293 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
294 VAL(INCLUDEPATH) LONG(include_path) \
295 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
296 VAL(EXCLUDE) LONG(exclude) \
297 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
298 VAL(EXCLUDEPATH) LONG(exclude_path) \
299 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
300 VAL(PRUNE) LONG(prune) \
301 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
302 VAL(PRUNEPATH) LONG(prune_path) \
303 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
304 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
305 HELPARG("age") HELPOPT("[--text] include only files older than this") \
306 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
307 HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \
308 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
309 LONG(server_addr) \
310 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
311 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
312 LONG(server_auth) LONG(web_auth) \
313 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
314 VAL(AUTHFILE) LONG(auth_file) \
315 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
316 VAL(AUTHFD) LONG(auth_fd) \
317 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
318 HELPPFX("also") \
319 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
320 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
321 NOVAL(LICENCE) LONG(licence) LONG(license) \
322 HELPOPT("display (MIT) licence text") \
323
324 #define IGNORE(x)
325 #define DEFENUM(x) OPT_ ## x,
326 #define ZERO(x) 0,
327 #define ONE(x) 1,
328 #define STRING(x) #x ,
329 #define STRINGNOCOMMA(x) #x
330 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
331 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
332 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
333 #define SHORTTMP(x) SHORTtmp3_ ## x,
334 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
335 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
336 #define LONGOPTVAL(x) LONGVAL_ ## x,
337 #define LONGTMP(x) SHORTtmp3_ ## x,
338
339 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
340 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
341
342 enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
343 enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
344 enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
345 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
346 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
347 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
348 enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
349 enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
350 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
351 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
352
353 static void usage(FILE *fp)
354 {
355 char longbuf[80];
356 const char *prefix, *shortopt, *longopt, *optarg;
357 int i, optex;
358
359 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
360 #define HELPNOVAL(s) optex = 0;
361 #define HELPVAL(s) optex = 1;
362 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
363 #define HELPLONG(s) if (!longopt) { \
364 strcpy(longbuf, "--" #s); longopt = longbuf; \
365 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
366 #define HELPPFX(s) prefix = s;
367 #define HELPARG(s) optarg = s;
368 #define HELPLINE(s) assert(optex == -1); \
369 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
370 HELPRESET;
371 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
372 assert(shortopt || longopt); \
373 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
374 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
375 optarg?" ":"", optarg?optarg:""); \
376 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
377
378 HELPRESET;
379 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
380 HELPPFX, HELPARG, HELPLINE, HELPOPT);
381
382 #undef HELPRESET
383 #undef HELPNOVAL
384 #undef HELPVAL
385 #undef HELPSHORT
386 #undef HELPLONG
387 #undef HELPPFX
388 #undef HELPARG
389 #undef HELPLINE
390 #undef HELPOPT
391 }
392
393 static time_t parse_age(time_t now, const char *agestr)
394 {
395 time_t t;
396 struct tm tm;
397 int nunits;
398 char unit[2];
399
400 t = now;
401
402 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
403 fprintf(stderr, "%s: age specification should be a number followed by"
404 " one of d,w,m,y\n", PNAME);
405 exit(1);
406 }
407
408 if (unit[0] == 'd') {
409 t -= 86400 * nunits;
410 } else if (unit[0] == 'w') {
411 t -= 86400 * 7 * nunits;
412 } else {
413 int ym;
414
415 tm = *localtime(&t);
416 ym = tm.tm_year * 12 + tm.tm_mon;
417
418 if (unit[0] == 'm')
419 ym -= nunits;
420 else
421 ym -= 12 * nunits;
422
423 tm.tm_year = ym / 12;
424 tm.tm_mon = ym % 12;
425
426 t = mktime(&tm);
427 }
428
429 return t;
430 }
431
432 int main(int argc, char **argv)
433 {
434 int fd, count;
435 struct ctx actx, *ctx = &actx;
436 struct stat st;
437 off_t totalsize, realsize;
438 void *mappedfile;
439 triewalk *tw;
440 indexbuild *ib;
441 const struct trie_file *tf;
442 char *filename = "agedu.dat";
443 char *scandir = NULL;
444 char *querydir = NULL;
445 int doing_opts = 1;
446 enum { USAGE, TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD } mode = USAGE;
447 time_t now = time(NULL);
448 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
449 int htmlautoagerange = 1;
450 const char *httpserveraddr = NULL;
451 int httpserverport = 0;
452 const char *httpauthdata = NULL;
453 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
454 int progress = 1;
455 struct inclusion_exclusion *inex = NULL;
456 int ninex = 0, inexsize = 0;
457 int crossfs = 0;
458
459 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
460 {
461 static const char *const optnames[NOPTIONS] = {
462 OPTIONS(STRING,STRING,IGNORE,IGNORE)
463 };
464 int i;
465 for (i = 0; i < NSHORTOPTS; i++)
466 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
467 opthasval[shortvals[i]] ? "value" : "no value");
468 for (i = 0; i < NLONGOPTS; i++)
469 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
470 opthasval[longvals[i]] ? "value" : "no value");
471 }
472 #endif
473
474 while (--argc > 0) {
475 char *p = *++argv;
476
477 if (doing_opts && *p == '-') {
478 int wordstart = 1;
479
480 if (!strcmp(p, "--")) {
481 doing_opts = 0;
482 continue;
483 }
484
485 p++;
486 while (*p) {
487 int optid = -1;
488 int i;
489 char *optval;
490
491 if (wordstart && *p == '-') {
492 /*
493 * GNU-style long option.
494 */
495 p++;
496 optval = strchr(p, '=');
497 if (optval)
498 *optval++ = '\0';
499
500 for (i = 0; i < NLONGOPTS; i++) {
501 const char *opt = longopts[i], *s = p;
502 int match = 1;
503 /*
504 * The underscores in the option names
505 * defined above may be given by the user
506 * as underscores or dashes, or omitted
507 * entirely.
508 */
509 while (*opt) {
510 if (*opt == '_') {
511 if (*s == '-' || *s == '_')
512 s++;
513 } else {
514 if (*opt != *s) {
515 match = 0;
516 break;
517 }
518 s++;
519 }
520 opt++;
521 }
522 if (match && !*s) {
523 optid = longvals[i];
524 break;
525 }
526 }
527
528 if (optid < 0) {
529 fprintf(stderr, "%s: unrecognised option '--%s'\n",
530 PNAME, p);
531 return 1;
532 }
533
534 if (!opthasval[optid]) {
535 if (optval) {
536 fprintf(stderr, "%s: unexpected argument to option"
537 " '--%s'\n", PNAME, p);
538 return 1;
539 }
540 } else {
541 if (!optval) {
542 if (--argc > 0) {
543 optval = *++argv;
544 } else {
545 fprintf(stderr, "%s: option '--%s' expects"
546 " an argument\n", PNAME, p);
547 return 1;
548 }
549 }
550 }
551
552 p += strlen(p); /* finished with this argument word */
553 } else {
554 /*
555 * Short option.
556 */
557 char c = *p++;
558
559 for (i = 0; i < NSHORTOPTS; i++)
560 if (c == shortopts[i]) {
561 optid = shortvals[i];
562 break;
563 }
564
565 if (optid < 0) {
566 fprintf(stderr, "%s: unrecognised option '-%c'\n",
567 PNAME, c);
568 return 1;
569 }
570
571 if (opthasval[optid]) {
572 if (*p) {
573 optval = p;
574 p += strlen(p);
575 } else if (--argc > 0) {
576 optval = *++argv;
577 } else {
578 fprintf(stderr, "%s: option '-%c' expects"
579 " an argument\n", PNAME, c);
580 return 1;
581 }
582 } else {
583 optval = NULL;
584 }
585 }
586
587 wordstart = 0;
588
589 /*
590 * Now actually process the option.
591 */
592 switch (optid) {
593 case OPT_HELP:
594 usage(stdout);
595 return 0;
596 case OPT_VERSION:
597 printf("FIXME: version();\n");
598 return 0;
599 case OPT_LICENCE:
600 printf("FIXME: licence();\n");
601 return 0;
602 case OPT_SCAN:
603 mode = SCAN;
604 scandir = optval;
605 break;
606 case OPT_SCANDUMP:
607 mode = SCANDUMP;
608 scandir = optval;
609 break;
610 case OPT_DUMP:
611 mode = DUMP;
612 break;
613 case OPT_LOAD:
614 mode = LOAD;
615 break;
616 case OPT_TEXT:
617 querydir = optval;
618 mode = TEXT;
619 break;
620 case OPT_HTML:
621 mode = HTML;
622 querydir = optval;
623 break;
624 case OPT_HTTPD:
625 mode = HTTPD;
626 break;
627 case OPT_PROGRESS:
628 progress = 2;
629 break;
630 case OPT_NOPROGRESS:
631 progress = 0;
632 break;
633 case OPT_TTYPROGRESS:
634 progress = 1;
635 break;
636 case OPT_CROSSFS:
637 crossfs = 1;
638 break;
639 case OPT_NOCROSSFS:
640 crossfs = 0;
641 break;
642 case OPT_DATAFILE:
643 filename = optval;
644 break;
645 case OPT_MINAGE:
646 textcutoff = parse_age(now, optval);
647 break;
648 case OPT_AGERANGE:
649 if (!strcmp(optval, "auto")) {
650 htmlautoagerange = 1;
651 } else {
652 char *q = optval + strcspn(optval, "-:");
653 if (*q)
654 *q++ = '\0';
655 htmloldest = parse_age(now, optval);
656 htmlnewest = *q ? parse_age(now, q) : now;
657 htmlautoagerange = 0;
658 }
659 break;
660 case OPT_SERVERADDR:
661 {
662 char *port;
663 if (optval[0] == '[' &&
664 (port = strchr(optval, ']')) != NULL)
665 port++;
666 else
667 port = optval;
668 port += strcspn(port, ":");
669 if (port)
670 *port++ = '\0';
671 httpserveraddr = optval;
672 httpserverport = atoi(port);
673 }
674 break;
675 case OPT_AUTH:
676 if (!strcmp(optval, "magic"))
677 auth = HTTPD_AUTH_MAGIC;
678 else if (!strcmp(optval, "basic"))
679 auth = HTTPD_AUTH_BASIC;
680 else if (!strcmp(optval, "none"))
681 auth = HTTPD_AUTH_NONE;
682 else if (!strcmp(optval, "default"))
683 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
684 else if (!strcmp(optval, "help") ||
685 !strcmp(optval, "list")) {
686 printf("agedu: supported HTTP authentication types"
687 " are:\n"
688 " magic use Linux /proc/net/tcp to"
689 " determine owner of peer socket\n"
690 " basic HTTP Basic username and"
691 " password authentication\n"
692 " default use 'magic' if possible, "
693 " otherwise fall back to 'basic'\n"
694 " none unauthenticated HTTP (if"
695 " the data file is non-confidential)\n");
696 return 0;
697 } else {
698 fprintf(stderr, "%s: unrecognised authentication"
699 " type '%s'\n%*s options are 'magic',"
700 " 'basic', 'none', 'default'\n",
701 PNAME, optval, (int)strlen(PNAME), "");
702 return 1;
703 }
704 break;
705 case OPT_AUTHFILE:
706 case OPT_AUTHFD:
707 {
708 int fd;
709 char namebuf[40];
710 const char *name;
711 char *authbuf;
712 int authlen, authsize;
713 int ret;
714
715 if (optid == OPT_AUTHFILE) {
716 fd = open(optval, O_RDONLY);
717 if (fd < 0) {
718 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
719 optval, strerror(errno));
720 return 1;
721 }
722 name = optval;
723 } else {
724 fd = atoi(optval);
725 name = namebuf;
726 sprintf(namebuf, "fd %d", fd);
727 }
728
729 authlen = 0;
730 authsize = 256;
731 authbuf = snewn(authsize, char);
732 while ((ret = read(fd, authbuf+authlen,
733 authsize-authlen)) > 0) {
734 authlen += ret;
735 if ((authsize - authlen) < (authsize / 16)) {
736 authsize = authlen * 3 / 2 + 4096;
737 authbuf = sresize(authbuf, authsize, char);
738 }
739 }
740 if (ret < 0) {
741 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
742 name, strerror(errno));
743 return 1;
744 }
745 if (optid == OPT_AUTHFILE)
746 close(fd);
747 httpauthdata = authbuf;
748 }
749 break;
750 case OPT_INCLUDE:
751 case OPT_INCLUDEPATH:
752 case OPT_EXCLUDE:
753 case OPT_EXCLUDEPATH:
754 case OPT_PRUNE:
755 case OPT_PRUNEPATH:
756 if (ninex >= inexsize) {
757 inexsize = ninex * 3 / 2 + 16;
758 inex = sresize(inex, inexsize,
759 struct inclusion_exclusion);
760 }
761 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
762 optid == OPT_EXCLUDEPATH ||
763 optid == OPT_PRUNEPATH);
764 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
765 optid == OPT_INCLUDEPATH ? 1 :
766 optid == OPT_EXCLUDE ? 0 :
767 optid == OPT_EXCLUDEPATH ? 0 :
768 optid == OPT_PRUNE ? -1 :
769 /* optid == OPT_PRUNEPATH ? */ -1);
770 inex[ninex].wildcard = optval;
771 ninex++;
772 break;
773 }
774 }
775 } else {
776 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
777 return 1;
778 }
779 }
780
781 if (mode == USAGE) {
782 usage(stderr);
783 return 1;
784 } else if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
785
786 if (mode == LOAD) {
787 char *buf = fgetline(stdin);
788 unsigned newpathsep;
789 buf[strcspn(buf, "\r\n")] = '\0';
790 if (1 != sscanf(buf, "agedu dump file. pathsep=%x",
791 &newpathsep)) {
792 fprintf(stderr, "%s: header in dump file not recognised\n",
793 PNAME);
794 return 1;
795 }
796 pathsep = (char)newpathsep;
797 sfree(buf);
798 }
799
800 if (mode == SCAN || mode == LOAD) {
801 /*
802 * Prepare to write out the index file.
803 */
804 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
805 if (fd < 0) {
806 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
807 strerror(errno));
808 return 1;
809 }
810 if (fstat(fd, &st) < 0) {
811 perror("agedu: fstat");
812 return 1;
813 }
814 ctx->datafile_dev = st.st_dev;
815 ctx->datafile_ino = st.st_ino;
816 ctx->straight_to_dump = 0;
817 } else {
818 ctx->datafile_dev = -1;
819 ctx->datafile_ino = -1;
820 ctx->straight_to_dump = 1;
821 }
822
823 if (mode == SCAN || mode == SCANDUMP) {
824 if (stat(scandir, &st) < 0) {
825 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
826 strerror(errno));
827 return 1;
828 }
829 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
830 }
831
832 ctx->inex = inex;
833 ctx->ninex = ninex;
834 ctx->crossfs = crossfs;
835
836 ctx->last_output_update = time(NULL);
837
838 /* progress==1 means report progress only if stderr is a tty */
839 if (progress == 1)
840 progress = isatty(2) ? 2 : 0;
841 ctx->progress = progress;
842 {
843 struct winsize ws;
844 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
845 ctx->progwidth = ws.ws_col - 1;
846 else
847 ctx->progwidth = 79;
848 }
849
850 if (mode == SCANDUMP)
851 printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep);
852
853 /*
854 * Scan the directory tree, and write out the trie component
855 * of the data file.
856 */
857 if (mode != SCANDUMP) {
858 ctx->tb = triebuild_new(fd);
859 }
860 if (mode == LOAD) {
861 char *buf;
862 int line = 2;
863 while ((buf = fgetline(stdin)) != NULL) {
864 struct trie_file tf;
865 char *p, *q;
866
867 buf[strcspn(buf, "\r\n")] = '\0';
868
869 p = buf;
870 q = p;
871 while (*p && *p != ' ') p++;
872 if (!*p) {
873 fprintf(stderr, "%s: dump file line %d: expected at least"
874 " three fields\n", PNAME, line);
875 return 1;
876 }
877 *p++ = '\0';
878 tf.size = strtoull(q, NULL, 10);
879 q = p;
880 while (*p && *p != ' ') p++;
881 if (!*p) {
882 fprintf(stderr, "%s: dump file line %d: expected at least"
883 " three fields\n", PNAME, line);
884 return 1;
885 }
886 *p++ = '\0';
887 tf.atime = strtoull(q, NULL, 10);
888 q = buf;
889 while (*p) {
890 int c = *p;
891 if (*p == '%') {
892 int i;
893 p++;
894 c = 0;
895 for (i = 0; i < 2; i++) {
896 if (*p >= '0' && *p <= '9')
897 c += *p - '0';
898 else if (*p >= 'A' && *p <= 'F')
899 c += *p - ('A' - 10);
900 else if (*p >= 'a' && *p <= 'f')
901 c += *p - ('a' - 10);
902 else {
903 fprintf(stderr, "%s: dump file line %d: unable"
904 " to parse hex escape\n", PNAME, line);
905 }
906 p++;
907 }
908 }
909 *q++ = c;
910 p++;
911 }
912 *q = '\0';
913 triebuild_add(ctx->tb, buf, &tf);
914 sfree(buf);
915 }
916 } else {
917 du(scandir, gotdata, ctx);
918 }
919 if (mode != SCANDUMP) {
920 count = triebuild_finish(ctx->tb);
921 triebuild_free(ctx->tb);
922
923 if (ctx->progress) {
924 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
925 fflush(stderr);
926 }
927
928 /*
929 * Work out how much space the cumulative index trees
930 * will take; enlarge the file, and memory-map it.
931 */
932 if (fstat(fd, &st) < 0) {
933 perror("agedu: fstat");
934 return 1;
935 }
936
937 printf("Built pathname index, %d entries, %ju bytes\n", count,
938 (intmax_t)st.st_size);
939
940 totalsize = index_compute_size(st.st_size, count);
941
942 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
943 perror("agedu: lseek");
944 return 1;
945 }
946 if (write(fd, "\0", 1) < 1) {
947 perror("agedu: write");
948 return 1;
949 }
950
951 printf("Upper bound on index file size = %ju bytes\n",
952 (intmax_t)totalsize);
953
954 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
955 if (!mappedfile) {
956 perror("agedu: mmap");
957 return 1;
958 }
959
960 ib = indexbuild_new(mappedfile, st.st_size, count);
961 tw = triewalk_new(mappedfile);
962 while ((tf = triewalk_next(tw, NULL)) != NULL)
963 indexbuild_add(ib, tf);
964 triewalk_free(tw);
965 realsize = indexbuild_realsize(ib);
966 indexbuild_free(ib);
967
968 munmap(mappedfile, totalsize);
969 ftruncate(fd, realsize);
970 close(fd);
971 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
972 }
973 } else if (mode == TEXT) {
974 size_t pathlen;
975
976 fd = open(filename, O_RDONLY);
977 if (fd < 0) {
978 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
979 strerror(errno));
980 return 1;
981 }
982 if (fstat(fd, &st) < 0) {
983 perror("agedu: fstat");
984 return 1;
985 }
986 totalsize = st.st_size;
987 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
988 if (!mappedfile) {
989 perror("agedu: mmap");
990 return 1;
991 }
992 pathsep = trie_pathsep(mappedfile);
993
994 /*
995 * Trim trailing slash, just in case.
996 */
997 pathlen = strlen(querydir);
998 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
999 querydir[--pathlen] = '\0';
1000
1001 text_query(mappedfile, querydir, textcutoff, 1);
1002 } else if (mode == HTML) {
1003 size_t pathlen;
1004 struct html_config cfg;
1005 unsigned long xi;
1006 char *html;
1007
1008 fd = open(filename, O_RDONLY);
1009 if (fd < 0) {
1010 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1011 strerror(errno));
1012 return 1;
1013 }
1014 if (fstat(fd, &st) < 0) {
1015 perror("agedu: fstat");
1016 return 1;
1017 }
1018 totalsize = st.st_size;
1019 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1020 if (!mappedfile) {
1021 perror("agedu: mmap");
1022 return 1;
1023 }
1024 pathsep = trie_pathsep(mappedfile);
1025
1026 /*
1027 * Trim trailing slash, just in case.
1028 */
1029 pathlen = strlen(querydir);
1030 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1031 querydir[--pathlen] = '\0';
1032
1033 xi = trie_before(mappedfile, querydir);
1034 cfg.format = NULL;
1035 cfg.autoage = htmlautoagerange;
1036 cfg.oldest = htmloldest;
1037 cfg.newest = htmlnewest;
1038 html = html_query(mappedfile, xi, &cfg);
1039 fputs(html, stdout);
1040 } else if (mode == DUMP) {
1041 size_t maxpathlen;
1042 char *buf;
1043
1044 fd = open(filename, O_RDONLY);
1045 if (fd < 0) {
1046 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1047 strerror(errno));
1048 return 1;
1049 }
1050 if (fstat(fd, &st) < 0) {
1051 perror("agedu: fstat");
1052 return 1;
1053 }
1054 totalsize = st.st_size;
1055 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1056 if (!mappedfile) {
1057 perror("agedu: mmap");
1058 return 1;
1059 }
1060 pathsep = trie_pathsep(mappedfile);
1061
1062 maxpathlen = trie_maxpathlen(mappedfile);
1063 buf = snewn(maxpathlen, char);
1064
1065 printf("agedu dump file. pathsep=%02x\n", (unsigned char)pathsep);
1066 tw = triewalk_new(mappedfile);
1067 while ((tf = triewalk_next(tw, buf)) != NULL)
1068 dump_line(buf, tf);
1069 triewalk_free(tw);
1070 } else if (mode == HTTPD) {
1071 struct html_config pcfg;
1072 struct httpd_config dcfg;
1073
1074 fd = open(filename, O_RDONLY);
1075 if (fd < 0) {
1076 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1077 strerror(errno));
1078 return 1;
1079 }
1080 if (fstat(fd, &st) < 0) {
1081 perror("agedu: fstat");
1082 return 1;
1083 }
1084 totalsize = st.st_size;
1085 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1086 if (!mappedfile) {
1087 perror("agedu: mmap");
1088 return 1;
1089 }
1090 pathsep = trie_pathsep(mappedfile);
1091
1092 dcfg.address = httpserveraddr;
1093 dcfg.port = httpserverport;
1094 dcfg.basicauthdata = httpauthdata;
1095 pcfg.format = NULL;
1096 pcfg.autoage = htmlautoagerange;
1097 pcfg.oldest = htmloldest;
1098 pcfg.newest = htmlnewest;
1099 run_httpd(mappedfile, auth, &dcfg, &pcfg);
1100 }
1101
1102 return 0;
1103 }