1a68eb48eb697c3e2cb848504ec71e8973481923
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #include "agedu.h"
6
7 #include "du.h"
8 #include "trie.h"
9 #include "index.h"
10 #include "alloc.h"
11 #include "html.h"
12 #include "httpd.h"
13 #include "fgetline.h"
14
15 /*
16 * Path separator. This global variable affects the behaviour of
17 * various parts of the code when they need to deal with path
18 * separators. The path separator appropriate to a particular data
19 * set is encoded in the index file storing that data set; data
20 * sets generated on Unix will of course have the default '/', but
21 * foreign data sets are conceivable and must be handled correctly.
22 */
23 char pathsep = '/';
24
25 void fatal(const char *fmt, ...)
26 {
27 va_list ap;
28 fprintf(stderr, "%s: ", PNAME);
29 va_start(ap, fmt);
30 vfprintf(stderr, fmt, ap);
31 va_end(ap);
32 fprintf(stderr, "\n");
33 exit(1);
34 }
35
36 struct inclusion_exclusion {
37 int type;
38 const char *wildcard;
39 int path;
40 };
41
42 struct ctx {
43 triebuild *tb;
44 dev_t datafile_dev, filesystem_dev;
45 ino_t datafile_ino;
46 time_t last_output_update;
47 int progress, progwidth;
48 int straight_to_dump;
49 struct inclusion_exclusion *inex;
50 int ninex;
51 int crossfs;
52 int fakeatimes;
53 };
54
55 static void dump_line(const char *pathname, const struct trie_file *tf)
56 {
57 const char *p;
58 printf("%llu %llu ", tf->size, tf->atime);
59 for (p = pathname; *p; p++) {
60 if (*p >= ' ' && *p < 127 && *p != '%')
61 putchar(*p);
62 else
63 printf("%%%02x", (unsigned char)*p);
64 }
65 putchar('\n');
66 }
67
68 static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
69 {
70 struct ctx *ctx = (struct ctx *)vctx;
71 struct trie_file file;
72 time_t t;
73 int i, include;
74 const char *filename;
75
76 /*
77 * Filter out our own data file.
78 */
79 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
80 return 0;
81
82 /*
83 * Don't cross the streams^W^Wany file system boundary.
84 */
85 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
86 return 0;
87
88 file.size = (unsigned long long)512 * st->st_blocks;
89 if (ctx->fakeatimes && S_ISDIR(st->st_mode))
90 file.atime = st->st_mtime;
91 else
92 file.atime = st->st_atime;
93
94 /*
95 * Filter based on wildcards.
96 */
97 include = 1;
98 filename = strrchr(pathname, pathsep);
99 if (!filename)
100 filename = pathname;
101 else
102 filename++;
103 for (i = 0; i < ctx->ninex; i++) {
104 if (fnmatch(ctx->inex[i].wildcard,
105 ctx->inex[i].path ? pathname : filename, 0) == 0)
106 include = ctx->inex[i].type;
107 }
108 if (include == -1)
109 return 0; /* ignore this entry and any subdirs */
110 if (include == 0) {
111 /*
112 * Here we are supposed to be filtering an entry out, but
113 * still recursing into it if it's a directory. However,
114 * we can't actually leave out any directory whose
115 * subdirectories we then look at. So we cheat, in that
116 * case, by setting the size to zero.
117 */
118 if (!S_ISDIR(st->st_mode))
119 return 0; /* just ignore */
120 else
121 file.size = 0;
122 }
123
124 if (ctx->straight_to_dump)
125 dump_line(pathname, &file);
126 else
127 triebuild_add(ctx->tb, pathname, &file);
128
129 if (ctx->progress) {
130 t = time(NULL);
131 if (t != ctx->last_output_update) {
132 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
133 pathname);
134 fflush(stderr);
135 ctx->last_output_update = t;
136 }
137 }
138
139 return 1;
140 }
141
142 static void text_query(const void *mappedfile, const char *querydir,
143 time_t t, int depth)
144 {
145 size_t maxpathlen;
146 char *pathbuf;
147 unsigned long xi1, xi2;
148 unsigned long long s1, s2;
149
150 maxpathlen = trie_maxpathlen(mappedfile);
151 pathbuf = snewn(maxpathlen + 1, char);
152
153 /*
154 * We want to query everything between the supplied filename
155 * (inclusive) and that filename with a ^A on the end
156 * (exclusive). So find the x indices for each.
157 */
158 strcpy(pathbuf, querydir);
159 make_successor(pathbuf);
160 xi1 = trie_before(mappedfile, querydir);
161 xi2 = trie_before(mappedfile, pathbuf);
162
163 if (xi2 - xi1 == 1)
164 return; /* file, or empty dir => no display */
165
166 /*
167 * Now do the lookups in the age index.
168 */
169 s1 = index_query(mappedfile, xi1, t);
170 s2 = index_query(mappedfile, xi2, t);
171
172 if (s1 == s2)
173 return; /* no space taken up => no display */
174
175 if (depth > 0) {
176 /*
177 * Now scan for first-level subdirectories and report
178 * those too.
179 */
180 xi1++;
181 while (xi1 < xi2) {
182 trie_getpath(mappedfile, xi1, pathbuf);
183 text_query(mappedfile, pathbuf, t, depth-1);
184 make_successor(pathbuf);
185 xi1 = trie_before(mappedfile, pathbuf);
186 }
187 }
188
189 /* Display in units of 1Kb */
190 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
191 }
192
193 /*
194 * Largely frivolous way to define all my command-line options. I
195 * present here a parametric macro which declares a series of
196 * _logical_ option identifiers, and for each one declares zero or
197 * more short option characters and zero or more long option
198 * words. Then I repeatedly invoke that macro with its arguments
199 * defined to be various other macros, which allows me to
200 * variously:
201 *
202 * - define an enum allocating a distinct integer value to each
203 * logical option id
204 * - define a string consisting of precisely all the short option
205 * characters
206 * - define a string array consisting of all the long option
207 * strings
208 * - define (with help from auxiliary enums) integer arrays
209 * parallel to both of the above giving the logical option id
210 * for each physical short and long option
211 * - define an array indexed by logical option id indicating
212 * whether the option in question takes a value
213 * - define a function which prints out brief online help for all
214 * the options.
215 *
216 * It's not at all clear to me that this trickery is actually
217 * particularly _efficient_ - it still, after all, requires going
218 * linearly through the option list at run time and doing a
219 * strcmp, whereas in an ideal world I'd have liked the lists of
220 * long and short options to be pre-sorted so that a binary search
221 * or some other more efficient lookup was possible. (Not that
222 * asymptotic algorithmic complexity is remotely vital in option
223 * parsing, but if I were doing this in, say, Lisp or something
224 * with an equivalently powerful preprocessor then once I'd had
225 * the idea of preparing the option-parsing data structures at
226 * compile time I would probably have made the effort to prepare
227 * them _properly_. I could have Perl generate me a source file
228 * from some sort of description, I suppose, but that would seem
229 * like overkill. And in any case, it's more of a challenge to
230 * achieve as much as possible by cunning use of cpp and enum than
231 * to just write some sensible and logical code in a Turing-
232 * complete language. I said it was largely frivolous :-)
233 *
234 * This approach does have the virtue that it brings together the
235 * option ids, option spellings and help text into a single
236 * combined list and defines them all in exactly one place. If I
237 * want to add a new option, or a new spelling for an option, I
238 * only have to modify the main OPTHELP macro below and then add
239 * code to process the new logical id.
240 *
241 * (Though, really, even that isn't ideal, since it still involves
242 * modifying the source file in more than one place. In a
243 * _properly_ ideal world, I'd be able to interleave the option
244 * definitions with the code fragments that process them. And then
245 * not bother defining logical identifiers for them at all - those
246 * would be automatically generated, since I wouldn't have any
247 * need to specify them manually in another part of the code.)
248 *
249 * One other helpful consequence of the enum-based structure here
250 * is that it causes a compiler error if I accidentally try to
251 * define the same option (short or long) twice.
252 */
253
254 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
255 HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
256 HELPPFX("actions") \
257 VAL(SCAN) SHORT(s) LONG(scan) \
258 HELPARG("directory") HELPOPT("scan and index a directory") \
259 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
260 HELPOPT("serve HTML reports from a temporary web server") \
261 VAL(TEXT) SHORT(t) LONG(text) \
262 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
263 NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \
264 HELPOPT("remove the index file") \
265 NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \
266 NOVAL(LOAD) SHORT(L) LONG(load) \
267 HELPOPT("load and index a dump file") \
268 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
269 HELPARG("directory") HELPOPT("scan only, generating a dump") \
270 VAL(HTML) SHORT(H) LONG(html) \
271 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
272 HELPPFX("options") \
273 VAL(DATAFILE) SHORT(f) LONG(file) \
274 HELPARG("filename") HELPOPT("[most modes] specify index file") \
275 NOVAL(CROSSFS) LONG(cross_fs) \
276 HELPOPT("[--scan] cross filesystem boundaries") \
277 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
278 HELPOPT("[--scan] stick to one filesystem") \
279 VAL(PRUNE) LONG(prune) \
280 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
281 VAL(PRUNEPATH) LONG(prune_path) \
282 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
283 VAL(EXCLUDE) LONG(exclude) \
284 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
285 VAL(EXCLUDEPATH) LONG(exclude_path) \
286 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
287 VAL(INCLUDE) LONG(include) \
288 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
289 VAL(INCLUDEPATH) LONG(include_path) \
290 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
291 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
292 HELPOPT("[--scan] report progress on stderr") \
293 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
294 HELPOPT("[--scan] do not report progress") \
295 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
296 LONG(progress_tty) LONG(scan_progress_tty) \
297 HELPOPT("[--scan] report progress if stderr is a tty") \
298 NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \
299 HELPOPT("[--scan,--load] keep real atimes on directories") \
300 NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
301 HELPOPT("[--scan,--load] fake atimes on directories") \
302 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
303 HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \
304 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
305 LONG(server_addr) \
306 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
307 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
308 LONG(server_auth) LONG(web_auth) \
309 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
310 VAL(AUTHFILE) LONG(auth_file) \
311 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
312 VAL(AUTHFD) LONG(auth_fd) \
313 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
314 VAL(TQDEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
315 HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \
316 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
317 HELPARG("age") HELPOPT("[--text] include only files older than this") \
318 HELPPFX("also") \
319 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
320 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
321 NOVAL(LICENCE) LONG(licence) LONG(license) \
322 HELPOPT("display (MIT) licence text") \
323
324 #define IGNORE(x)
325 #define DEFENUM(x) OPT_ ## x,
326 #define ZERO(x) 0,
327 #define ONE(x) 1,
328 #define STRING(x) #x ,
329 #define STRINGNOCOMMA(x) #x
330 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
331 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
332 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
333 #define SHORTTMP(x) SHORTtmp3_ ## x,
334 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
335 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
336 #define LONGOPTVAL(x) LONGVAL_ ## x,
337 #define LONGTMP(x) SHORTtmp3_ ## x,
338
339 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
340 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
341
342 enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
343 enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
344 enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
345 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
346 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
347 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
348 enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
349 enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
350 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
351 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
352
353 static void usage(FILE *fp)
354 {
355 char longbuf[80];
356 const char *prefix, *shortopt, *longopt, *optarg;
357 int i, optex;
358
359 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
360 #define HELPNOVAL(s) optex = 0;
361 #define HELPVAL(s) optex = 1;
362 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
363 #define HELPLONG(s) if (!longopt) { \
364 strcpy(longbuf, "--" #s); longopt = longbuf; \
365 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
366 #define HELPPFX(s) prefix = s;
367 #define HELPARG(s) optarg = s;
368 #define HELPLINE(s) assert(optex == -1); \
369 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
370 HELPRESET;
371 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
372 assert(shortopt || longopt); \
373 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
374 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
375 optarg?" ":"", optarg?optarg:""); \
376 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
377
378 HELPRESET;
379 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
380 HELPPFX, HELPARG, HELPLINE, HELPOPT);
381
382 #undef HELPRESET
383 #undef HELPNOVAL
384 #undef HELPVAL
385 #undef HELPSHORT
386 #undef HELPLONG
387 #undef HELPPFX
388 #undef HELPARG
389 #undef HELPLINE
390 #undef HELPOPT
391 }
392
393 static time_t parse_age(time_t now, const char *agestr)
394 {
395 time_t t;
396 struct tm tm;
397 int nunits;
398 char unit[2];
399
400 t = now;
401
402 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
403 fprintf(stderr, "%s: age specification should be a number followed by"
404 " one of d,w,m,y\n", PNAME);
405 exit(1);
406 }
407
408 if (unit[0] == 'd') {
409 t -= 86400 * nunits;
410 } else if (unit[0] == 'w') {
411 t -= 86400 * 7 * nunits;
412 } else {
413 int ym;
414
415 tm = *localtime(&t);
416 ym = tm.tm_year * 12 + tm.tm_mon;
417
418 if (unit[0] == 'm')
419 ym -= nunits;
420 else
421 ym -= 12 * nunits;
422
423 tm.tm_year = ym / 12;
424 tm.tm_mon = ym % 12;
425
426 t = mktime(&tm);
427 }
428
429 return t;
430 }
431
432 int main(int argc, char **argv)
433 {
434 int fd, count;
435 struct ctx actx, *ctx = &actx;
436 struct stat st;
437 off_t totalsize, realsize;
438 void *mappedfile;
439 triewalk *tw;
440 indexbuild *ib;
441 const struct trie_file *tf;
442 char *filename = PNAME ".dat";
443 int doing_opts = 1;
444 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE };
445 struct action {
446 int mode;
447 char *arg;
448 } *actions = NULL;
449 int nactions = 0, actionsize = 0, action;
450 time_t now = time(NULL);
451 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
452 int htmlautoagerange = 1;
453 const char *httpserveraddr = NULL;
454 int httpserverport = 0;
455 const char *httpauthdata = NULL;
456 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
457 int progress = 1;
458 struct inclusion_exclusion *inex = NULL;
459 int ninex = 0, inexsize = 0;
460 int crossfs = 0;
461 int tqdepth = 1;
462 int fakediratimes = 1;
463
464 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
465 {
466 static const char *const optnames[NOPTIONS] = {
467 OPTIONS(STRING,STRING,IGNORE,IGNORE)
468 };
469 int i;
470 for (i = 0; i < NSHORTOPTS; i++)
471 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
472 opthasval[shortvals[i]] ? "value" : "no value");
473 for (i = 0; i < NLONGOPTS; i++)
474 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
475 opthasval[longvals[i]] ? "value" : "no value");
476 }
477 #endif
478
479 while (--argc > 0) {
480 char *p = *++argv;
481
482 if (doing_opts && *p == '-') {
483 int wordstart = 1;
484
485 if (!strcmp(p, "--")) {
486 doing_opts = 0;
487 continue;
488 }
489
490 p++;
491 while (*p) {
492 int optid = -1;
493 int i;
494 char *optval;
495
496 if (wordstart && *p == '-') {
497 /*
498 * GNU-style long option.
499 */
500 p++;
501 optval = strchr(p, '=');
502 if (optval)
503 *optval++ = '\0';
504
505 for (i = 0; i < NLONGOPTS; i++) {
506 const char *opt = longopts[i], *s = p;
507 int match = 1;
508 /*
509 * The underscores in the option names
510 * defined above may be given by the user
511 * as underscores or dashes, or omitted
512 * entirely.
513 */
514 while (*opt) {
515 if (*opt == '_') {
516 if (*s == '-' || *s == '_')
517 s++;
518 } else {
519 if (*opt != *s) {
520 match = 0;
521 break;
522 }
523 s++;
524 }
525 opt++;
526 }
527 if (match && !*s) {
528 optid = longvals[i];
529 break;
530 }
531 }
532
533 if (optid < 0) {
534 fprintf(stderr, "%s: unrecognised option '--%s'\n",
535 PNAME, p);
536 return 1;
537 }
538
539 if (!opthasval[optid]) {
540 if (optval) {
541 fprintf(stderr, "%s: unexpected argument to option"
542 " '--%s'\n", PNAME, p);
543 return 1;
544 }
545 } else {
546 if (!optval) {
547 if (--argc > 0) {
548 optval = *++argv;
549 } else {
550 fprintf(stderr, "%s: option '--%s' expects"
551 " an argument\n", PNAME, p);
552 return 1;
553 }
554 }
555 }
556
557 p += strlen(p); /* finished with this argument word */
558 } else {
559 /*
560 * Short option.
561 */
562 char c = *p++;
563
564 for (i = 0; i < NSHORTOPTS; i++)
565 if (c == shortopts[i]) {
566 optid = shortvals[i];
567 break;
568 }
569
570 if (optid < 0) {
571 fprintf(stderr, "%s: unrecognised option '-%c'\n",
572 PNAME, c);
573 return 1;
574 }
575
576 if (opthasval[optid]) {
577 if (*p) {
578 optval = p;
579 p += strlen(p);
580 } else if (--argc > 0) {
581 optval = *++argv;
582 } else {
583 fprintf(stderr, "%s: option '-%c' expects"
584 " an argument\n", PNAME, c);
585 return 1;
586 }
587 } else {
588 optval = NULL;
589 }
590 }
591
592 wordstart = 0;
593
594 /*
595 * Now actually process the option.
596 */
597 switch (optid) {
598 case OPT_HELP:
599 usage(stdout);
600 return 0;
601 case OPT_VERSION:
602 #ifdef PACKAGE_VERSION
603 printf("%s, revision %s\n", PNAME, PACKAGE_VERSION);
604 #else
605 printf("%s: version number not available when not built"
606 " via automake\n", PNAME);
607 #endif
608 return 0;
609 case OPT_LICENCE:
610 {
611 extern const char *const licence[];
612 int i;
613
614 for (i = 0; licence[i]; i++)
615 fputs(licence[i], stdout);
616
617 return 0;
618 }
619 return 0;
620 case OPT_SCAN:
621 if (nactions >= actionsize) {
622 actionsize = nactions * 3 / 2 + 16;
623 actions = sresize(actions, actionsize, struct action);
624 }
625 actions[nactions].mode = SCAN;
626 actions[nactions].arg = optval;
627 nactions++;
628 break;
629 case OPT_SCANDUMP:
630 if (nactions >= actionsize) {
631 actionsize = nactions * 3 / 2 + 16;
632 actions = sresize(actions, actionsize, struct action);
633 }
634 actions[nactions].mode = SCANDUMP;
635 actions[nactions].arg = optval;
636 nactions++;
637 break;
638 case OPT_DUMP:
639 if (nactions >= actionsize) {
640 actionsize = nactions * 3 / 2 + 16;
641 actions = sresize(actions, actionsize, struct action);
642 }
643 actions[nactions].mode = DUMP;
644 actions[nactions].arg = NULL;
645 nactions++;
646 break;
647 case OPT_LOAD:
648 if (nactions >= actionsize) {
649 actionsize = nactions * 3 / 2 + 16;
650 actions = sresize(actions, actionsize, struct action);
651 }
652 actions[nactions].mode = LOAD;
653 actions[nactions].arg = NULL;
654 nactions++;
655 break;
656 case OPT_TEXT:
657 if (nactions >= actionsize) {
658 actionsize = nactions * 3 / 2 + 16;
659 actions = sresize(actions, actionsize, struct action);
660 }
661 actions[nactions].mode = TEXT;
662 actions[nactions].arg = optval;
663 nactions++;
664 break;
665 case OPT_HTML:
666 if (nactions >= actionsize) {
667 actionsize = nactions * 3 / 2 + 16;
668 actions = sresize(actions, actionsize, struct action);
669 }
670 actions[nactions].mode = HTML;
671 actions[nactions].arg = optval;
672 nactions++;
673 break;
674 case OPT_HTTPD:
675 if (nactions >= actionsize) {
676 actionsize = nactions * 3 / 2 + 16;
677 actions = sresize(actions, actionsize, struct action);
678 }
679 actions[nactions].mode = HTTPD;
680 actions[nactions].arg = NULL;
681 nactions++;
682 break;
683 case OPT_REMOVE:
684 if (nactions >= actionsize) {
685 actionsize = nactions * 3 / 2 + 16;
686 actions = sresize(actions, actionsize, struct action);
687 }
688 actions[nactions].mode = REMOVE;
689 actions[nactions].arg = NULL;
690 nactions++;
691 break;
692 case OPT_PROGRESS:
693 progress = 2;
694 break;
695 case OPT_NOPROGRESS:
696 progress = 0;
697 break;
698 case OPT_TTYPROGRESS:
699 progress = 1;
700 break;
701 case OPT_CROSSFS:
702 crossfs = 1;
703 break;
704 case OPT_NOCROSSFS:
705 crossfs = 0;
706 break;
707 case OPT_DIRATIME:
708 fakediratimes = 0;
709 break;
710 case OPT_NODIRATIME:
711 fakediratimes = 1;
712 break;
713 case OPT_DATAFILE:
714 filename = optval;
715 break;
716 case OPT_TQDEPTH:
717 tqdepth = atoi(optval);
718 break;
719 case OPT_MINAGE:
720 textcutoff = parse_age(now, optval);
721 break;
722 case OPT_AGERANGE:
723 if (!strcmp(optval, "auto")) {
724 htmlautoagerange = 1;
725 } else {
726 char *q = optval + strcspn(optval, "-:");
727 if (*q)
728 *q++ = '\0';
729 htmloldest = parse_age(now, optval);
730 htmlnewest = *q ? parse_age(now, q) : now;
731 htmlautoagerange = 0;
732 }
733 break;
734 case OPT_SERVERADDR:
735 {
736 char *port;
737 if (optval[0] == '[' &&
738 (port = strchr(optval, ']')) != NULL)
739 port++;
740 else
741 port = optval;
742 port += strcspn(port, ":");
743 if (port)
744 *port++ = '\0';
745 httpserveraddr = optval;
746 httpserverport = atoi(port);
747 }
748 break;
749 case OPT_AUTH:
750 if (!strcmp(optval, "magic"))
751 auth = HTTPD_AUTH_MAGIC;
752 else if (!strcmp(optval, "basic"))
753 auth = HTTPD_AUTH_BASIC;
754 else if (!strcmp(optval, "none"))
755 auth = HTTPD_AUTH_NONE;
756 else if (!strcmp(optval, "default"))
757 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
758 else if (!strcmp(optval, "help") ||
759 !strcmp(optval, "list")) {
760 printf(PNAME ": supported HTTP authentication types"
761 " are:\n"
762 " magic use Linux /proc/net/tcp to"
763 " determine owner of peer socket\n"
764 " basic HTTP Basic username and"
765 " password authentication\n"
766 " default use 'magic' if possible, "
767 " otherwise fall back to 'basic'\n"
768 " none unauthenticated HTTP (if"
769 " the data file is non-confidential)\n");
770 return 0;
771 } else {
772 fprintf(stderr, "%s: unrecognised authentication"
773 " type '%s'\n%*s options are 'magic',"
774 " 'basic', 'none', 'default'\n",
775 PNAME, optval, (int)strlen(PNAME), "");
776 return 1;
777 }
778 break;
779 case OPT_AUTHFILE:
780 case OPT_AUTHFD:
781 {
782 int fd;
783 char namebuf[40];
784 const char *name;
785 char *authbuf;
786 int authlen, authsize;
787 int ret;
788
789 if (optid == OPT_AUTHFILE) {
790 fd = open(optval, O_RDONLY);
791 if (fd < 0) {
792 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
793 optval, strerror(errno));
794 return 1;
795 }
796 name = optval;
797 } else {
798 fd = atoi(optval);
799 name = namebuf;
800 sprintf(namebuf, "fd %d", fd);
801 }
802
803 authlen = 0;
804 authsize = 256;
805 authbuf = snewn(authsize, char);
806 while ((ret = read(fd, authbuf+authlen,
807 authsize-authlen)) > 0) {
808 authlen += ret;
809 if ((authsize - authlen) < (authsize / 16)) {
810 authsize = authlen * 3 / 2 + 4096;
811 authbuf = sresize(authbuf, authsize, char);
812 }
813 }
814 if (ret < 0) {
815 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
816 name, strerror(errno));
817 return 1;
818 }
819 if (optid == OPT_AUTHFILE)
820 close(fd);
821 httpauthdata = authbuf;
822 }
823 break;
824 case OPT_INCLUDE:
825 case OPT_INCLUDEPATH:
826 case OPT_EXCLUDE:
827 case OPT_EXCLUDEPATH:
828 case OPT_PRUNE:
829 case OPT_PRUNEPATH:
830 if (ninex >= inexsize) {
831 inexsize = ninex * 3 / 2 + 16;
832 inex = sresize(inex, inexsize,
833 struct inclusion_exclusion);
834 }
835 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
836 optid == OPT_EXCLUDEPATH ||
837 optid == OPT_PRUNEPATH);
838 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
839 optid == OPT_INCLUDEPATH ? 1 :
840 optid == OPT_EXCLUDE ? 0 :
841 optid == OPT_EXCLUDEPATH ? 0 :
842 optid == OPT_PRUNE ? -1 :
843 /* optid == OPT_PRUNEPATH ? */ -1);
844 inex[ninex].wildcard = optval;
845 ninex++;
846 break;
847 }
848 }
849 } else {
850 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
851 return 1;
852 }
853 }
854
855 if (nactions == 0) {
856 usage(stderr);
857 return 1;
858 }
859
860 for (action = 0; action < nactions; action++) {
861 int mode = actions[action].mode;
862
863 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
864 const char *scandir = actions[action].arg;
865 if (mode == LOAD) {
866 char *buf = fgetline(stdin);
867 unsigned newpathsep;
868 buf[strcspn(buf, "\r\n")] = '\0';
869 if (1 != sscanf(buf, DUMPHDR "%x",
870 &newpathsep)) {
871 fprintf(stderr, "%s: header in dump file not recognised\n",
872 PNAME);
873 return 1;
874 }
875 pathsep = (char)newpathsep;
876 sfree(buf);
877 }
878
879 if (mode == SCAN || mode == LOAD) {
880 /*
881 * Prepare to write out the index file.
882 */
883 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
884 if (fd < 0) {
885 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
886 strerror(errno));
887 return 1;
888 }
889 if (fstat(fd, &st) < 0) {
890 perror(PNAME ": fstat");
891 return 1;
892 }
893 ctx->datafile_dev = st.st_dev;
894 ctx->datafile_ino = st.st_ino;
895 ctx->straight_to_dump = 0;
896 } else {
897 ctx->datafile_dev = -1;
898 ctx->datafile_ino = -1;
899 ctx->straight_to_dump = 1;
900 }
901
902 if (mode == SCAN || mode == SCANDUMP) {
903 if (stat(scandir, &st) < 0) {
904 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
905 strerror(errno));
906 return 1;
907 }
908 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
909 }
910
911 ctx->inex = inex;
912 ctx->ninex = ninex;
913 ctx->crossfs = crossfs;
914 ctx->fakeatimes = fakediratimes;
915
916 ctx->last_output_update = time(NULL);
917
918 /* progress==1 means report progress only if stderr is a tty */
919 if (progress == 1)
920 progress = isatty(2) ? 2 : 0;
921 ctx->progress = progress;
922 {
923 struct winsize ws;
924 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
925 ctx->progwidth = ws.ws_col - 1;
926 else
927 ctx->progwidth = 79;
928 }
929
930 if (mode == SCANDUMP)
931 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
932
933 /*
934 * Scan the directory tree, and write out the trie component
935 * of the data file.
936 */
937 if (mode != SCANDUMP) {
938 ctx->tb = triebuild_new(fd);
939 }
940 if (mode == LOAD) {
941 char *buf;
942 int line = 2;
943 while ((buf = fgetline(stdin)) != NULL) {
944 struct trie_file tf;
945 char *p, *q;
946
947 buf[strcspn(buf, "\r\n")] = '\0';
948
949 p = buf;
950 q = p;
951 while (*p && *p != ' ') p++;
952 if (!*p) {
953 fprintf(stderr, "%s: dump file line %d: expected at least"
954 " three fields\n", PNAME, line);
955 return 1;
956 }
957 *p++ = '\0';
958 tf.size = strtoull(q, NULL, 10);
959 q = p;
960 while (*p && *p != ' ') p++;
961 if (!*p) {
962 fprintf(stderr, "%s: dump file line %d: expected at least"
963 " three fields\n", PNAME, line);
964 return 1;
965 }
966 *p++ = '\0';
967 tf.atime = strtoull(q, NULL, 10);
968 q = buf;
969 while (*p) {
970 int c = *p;
971 if (*p == '%') {
972 int i;
973 p++;
974 c = 0;
975 for (i = 0; i < 2; i++) {
976 c *= 16;
977 if (*p >= '0' && *p <= '9')
978 c += *p - '0';
979 else if (*p >= 'A' && *p <= 'F')
980 c += *p - ('A' - 10);
981 else if (*p >= 'a' && *p <= 'f')
982 c += *p - ('a' - 10);
983 else {
984 fprintf(stderr, "%s: dump file line %d: unable"
985 " to parse hex escape\n", PNAME, line);
986 }
987 p++;
988 }
989 }
990 *q++ = c;
991 p++;
992 }
993 *q = '\0';
994 triebuild_add(ctx->tb, buf, &tf);
995 sfree(buf);
996 line++;
997 }
998 } else {
999 du(scandir, gotdata, ctx);
1000 }
1001 if (mode != SCANDUMP) {
1002 count = triebuild_finish(ctx->tb);
1003 triebuild_free(ctx->tb);
1004
1005 if (ctx->progress) {
1006 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
1007 fflush(stderr);
1008 }
1009
1010 /*
1011 * Work out how much space the cumulative index trees
1012 * will take; enlarge the file, and memory-map it.
1013 */
1014 if (fstat(fd, &st) < 0) {
1015 perror(PNAME ": fstat");
1016 return 1;
1017 }
1018
1019 printf("Built pathname index, %d entries, %llu bytes\n", count,
1020 (unsigned long long)st.st_size);
1021
1022 totalsize = index_compute_size(st.st_size, count);
1023
1024 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
1025 perror(PNAME ": lseek");
1026 return 1;
1027 }
1028 if (write(fd, "\0", 1) < 1) {
1029 perror(PNAME ": write");
1030 return 1;
1031 }
1032
1033 printf("Upper bound on index file size = %llu bytes\n",
1034 (unsigned long long)totalsize);
1035
1036 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1037 if (!mappedfile) {
1038 perror(PNAME ": mmap");
1039 return 1;
1040 }
1041
1042 if (fakediratimes) {
1043 printf("Faking directory atimes\n");
1044 trie_fake_dir_atimes(mappedfile);
1045 }
1046
1047 printf("Building index\n");
1048 ib = indexbuild_new(mappedfile, st.st_size, count);
1049 tw = triewalk_new(mappedfile);
1050 while ((tf = triewalk_next(tw, NULL)) != NULL)
1051 indexbuild_add(ib, tf);
1052 triewalk_free(tw);
1053 realsize = indexbuild_realsize(ib);
1054 indexbuild_free(ib);
1055
1056 munmap(mappedfile, totalsize);
1057 ftruncate(fd, realsize);
1058 close(fd);
1059 printf("Actual index file size = %llu bytes\n",
1060 (unsigned long long)realsize);
1061 }
1062 } else if (mode == TEXT) {
1063 char *querydir = actions[action].arg;
1064 size_t pathlen;
1065
1066 fd = open(filename, O_RDONLY);
1067 if (fd < 0) {
1068 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1069 strerror(errno));
1070 return 1;
1071 }
1072 if (fstat(fd, &st) < 0) {
1073 perror(PNAME ": fstat");
1074 return 1;
1075 }
1076 totalsize = st.st_size;
1077 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1078 if (!mappedfile) {
1079 perror(PNAME ": mmap");
1080 return 1;
1081 }
1082 pathsep = trie_pathsep(mappedfile);
1083
1084 /*
1085 * Trim trailing slash, just in case.
1086 */
1087 pathlen = strlen(querydir);
1088 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1089 querydir[--pathlen] = '\0';
1090
1091 text_query(mappedfile, querydir, textcutoff, tqdepth);
1092 } else if (mode == HTML) {
1093 char *querydir = actions[action].arg;
1094 size_t pathlen;
1095 struct html_config cfg;
1096 unsigned long xi;
1097 char *html;
1098
1099 fd = open(filename, O_RDONLY);
1100 if (fd < 0) {
1101 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1102 strerror(errno));
1103 return 1;
1104 }
1105 if (fstat(fd, &st) < 0) {
1106 perror(PNAME ": fstat");
1107 return 1;
1108 }
1109 totalsize = st.st_size;
1110 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1111 if (!mappedfile) {
1112 perror(PNAME ": mmap");
1113 return 1;
1114 }
1115 pathsep = trie_pathsep(mappedfile);
1116
1117 /*
1118 * Trim trailing slash, just in case.
1119 */
1120 pathlen = strlen(querydir);
1121 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1122 querydir[--pathlen] = '\0';
1123
1124 xi = trie_before(mappedfile, querydir);
1125 cfg.format = NULL;
1126 cfg.autoage = htmlautoagerange;
1127 cfg.oldest = htmloldest;
1128 cfg.newest = htmlnewest;
1129 html = html_query(mappedfile, xi, &cfg);
1130 fputs(html, stdout);
1131 } else if (mode == DUMP) {
1132 size_t maxpathlen;
1133 char *buf;
1134
1135 fd = open(filename, O_RDONLY);
1136 if (fd < 0) {
1137 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1138 strerror(errno));
1139 return 1;
1140 }
1141 if (fstat(fd, &st) < 0) {
1142 perror(PNAME ": fstat");
1143 return 1;
1144 }
1145 totalsize = st.st_size;
1146 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1147 if (!mappedfile) {
1148 perror(PNAME ": mmap");
1149 return 1;
1150 }
1151 pathsep = trie_pathsep(mappedfile);
1152
1153 maxpathlen = trie_maxpathlen(mappedfile);
1154 buf = snewn(maxpathlen, char);
1155
1156 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
1157 tw = triewalk_new(mappedfile);
1158 while ((tf = triewalk_next(tw, buf)) != NULL)
1159 dump_line(buf, tf);
1160 triewalk_free(tw);
1161 } else if (mode == HTTPD) {
1162 struct html_config pcfg;
1163 struct httpd_config dcfg;
1164
1165 fd = open(filename, O_RDONLY);
1166 if (fd < 0) {
1167 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1168 strerror(errno));
1169 return 1;
1170 }
1171 if (fstat(fd, &st) < 0) {
1172 perror(PNAME ": fstat");
1173 return 1;
1174 }
1175 totalsize = st.st_size;
1176 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1177 if (!mappedfile) {
1178 perror(PNAME ": mmap");
1179 return 1;
1180 }
1181 pathsep = trie_pathsep(mappedfile);
1182
1183 dcfg.address = httpserveraddr;
1184 dcfg.port = httpserverport;
1185 dcfg.basicauthdata = httpauthdata;
1186 pcfg.format = NULL;
1187 pcfg.autoage = htmlautoagerange;
1188 pcfg.oldest = htmloldest;
1189 pcfg.newest = htmlnewest;
1190 run_httpd(mappedfile, auth, &dcfg, &pcfg);
1191 } else if (mode == REMOVE) {
1192 if (remove(filename) < 0) {
1193 fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename,
1194 strerror(errno));
1195 return 1;
1196 }
1197 }
1198 }
1199
1200 return 0;
1201 }