Fix bonehead error in hex escape processing when loading dump files.
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #define _GNU_SOURCE
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <time.h>
13 #include <assert.h>
14
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <fcntl.h>
18 #include <sys/mman.h>
19 #include <termios.h>
20 #include <sys/ioctl.h>
21 #include <fnmatch.h>
22
23 #include "agedu.h"
24 #include "du.h"
25 #include "trie.h"
26 #include "index.h"
27 #include "malloc.h"
28 #include "html.h"
29 #include "httpd.h"
30 #include "fgetline.h"
31
32 /*
33 * Path separator. This global variable affects the behaviour of
34 * various parts of the code when they need to deal with path
35 * separators. The path separator appropriate to a particular data
36 * set is encoded in the index file storing that data set; data
37 * sets generated on Unix will of course have the default '/', but
38 * foreign data sets are conceivable and must be handled correctly.
39 */
40 char pathsep = '/';
41
42 void fatal(const char *fmt, ...)
43 {
44 va_list ap;
45 fprintf(stderr, "%s: ", PNAME);
46 va_start(ap, fmt);
47 vfprintf(stderr, fmt, ap);
48 va_end(ap);
49 fprintf(stderr, "\n");
50 exit(1);
51 }
52
53 struct inclusion_exclusion {
54 int type;
55 const char *wildcard;
56 int path;
57 };
58
59 struct ctx {
60 triebuild *tb;
61 dev_t datafile_dev, filesystem_dev;
62 ino_t datafile_ino;
63 time_t last_output_update;
64 int progress, progwidth;
65 int straight_to_dump;
66 struct inclusion_exclusion *inex;
67 int ninex;
68 int crossfs;
69 int fakeatimes;
70 };
71
72 static void dump_line(const char *pathname, const struct trie_file *tf)
73 {
74 const char *p;
75 printf("%llu %llu ", tf->size, tf->atime);
76 for (p = pathname; *p; p++) {
77 if (*p >= ' ' && *p < 127 && *p != '%')
78 putchar(*p);
79 else
80 printf("%%%02x", (unsigned char)*p);
81 }
82 putchar('\n');
83 }
84
85 static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
86 {
87 struct ctx *ctx = (struct ctx *)vctx;
88 struct trie_file file;
89 time_t t;
90 int i, include;
91 const char *filename;
92
93 /*
94 * Filter out our own data file.
95 */
96 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
97 return 0;
98
99 /*
100 * Don't cross the streams^W^Wany file system boundary.
101 */
102 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
103 return 0;
104
105 file.size = (unsigned long long)512 * st->st_blocks;
106 if (ctx->fakeatimes && S_ISDIR(st->st_mode))
107 file.atime = st->st_mtime;
108 else
109 file.atime = st->st_atime;
110
111 /*
112 * Filter based on wildcards.
113 */
114 include = 1;
115 filename = strrchr(pathname, pathsep);
116 if (!filename)
117 filename = pathname;
118 else
119 filename++;
120 for (i = 0; i < ctx->ninex; i++) {
121 if (fnmatch(ctx->inex[i].wildcard,
122 ctx->inex[i].path ? pathname : filename, 0) == 0)
123 include = ctx->inex[i].type;
124 }
125 if (include == -1)
126 return 0; /* ignore this entry and any subdirs */
127 if (include == 0) {
128 /*
129 * Here we are supposed to be filtering an entry out, but
130 * still recursing into it if it's a directory. However,
131 * we can't actually leave out any directory whose
132 * subdirectories we then look at. So we cheat, in that
133 * case, by setting the size to zero.
134 */
135 if (!S_ISDIR(st->st_mode))
136 return 0; /* just ignore */
137 else
138 file.size = 0;
139 }
140
141 if (ctx->straight_to_dump)
142 dump_line(pathname, &file);
143 else
144 triebuild_add(ctx->tb, pathname, &file);
145
146 if (ctx->progress) {
147 t = time(NULL);
148 if (t != ctx->last_output_update) {
149 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
150 pathname);
151 fflush(stderr);
152 ctx->last_output_update = t;
153 }
154 }
155
156 return 1;
157 }
158
159 static void text_query(const void *mappedfile, const char *querydir,
160 time_t t, int depth)
161 {
162 size_t maxpathlen;
163 char *pathbuf;
164 unsigned long xi1, xi2;
165 unsigned long long s1, s2;
166
167 maxpathlen = trie_maxpathlen(mappedfile);
168 pathbuf = snewn(maxpathlen + 1, char);
169
170 /*
171 * We want to query everything between the supplied filename
172 * (inclusive) and that filename with a ^A on the end
173 * (exclusive). So find the x indices for each.
174 */
175 strcpy(pathbuf, querydir);
176 make_successor(pathbuf);
177 xi1 = trie_before(mappedfile, querydir);
178 xi2 = trie_before(mappedfile, pathbuf);
179
180 if (xi2 - xi1 == 1)
181 return; /* file, or empty dir => no display */
182
183 /*
184 * Now do the lookups in the age index.
185 */
186 s1 = index_query(mappedfile, xi1, t);
187 s2 = index_query(mappedfile, xi2, t);
188
189 if (s1 == s2)
190 return; /* no space taken up => no display */
191
192 if (depth > 0) {
193 /*
194 * Now scan for first-level subdirectories and report
195 * those too.
196 */
197 xi1++;
198 while (xi1 < xi2) {
199 trie_getpath(mappedfile, xi1, pathbuf);
200 text_query(mappedfile, pathbuf, t, depth-1);
201 make_successor(pathbuf);
202 xi1 = trie_before(mappedfile, pathbuf);
203 }
204 }
205
206 /* Display in units of 1Kb */
207 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
208 }
209
210 /*
211 * Largely frivolous way to define all my command-line options. I
212 * present here a parametric macro which declares a series of
213 * _logical_ option identifiers, and for each one declares zero or
214 * more short option characters and zero or more long option
215 * words. Then I repeatedly invoke that macro with its arguments
216 * defined to be various other macros, which allows me to
217 * variously:
218 *
219 * - define an enum allocating a distinct integer value to each
220 * logical option id
221 * - define a string consisting of precisely all the short option
222 * characters
223 * - define a string array consisting of all the long option
224 * strings
225 * - define (with help from auxiliary enums) integer arrays
226 * parallel to both of the above giving the logical option id
227 * for each physical short and long option
228 * - define an array indexed by logical option id indicating
229 * whether the option in question takes a value
230 * - define a function which prints out brief online help for all
231 * the options.
232 *
233 * It's not at all clear to me that this trickery is actually
234 * particularly _efficient_ - it still, after all, requires going
235 * linearly through the option list at run time and doing a
236 * strcmp, whereas in an ideal world I'd have liked the lists of
237 * long and short options to be pre-sorted so that a binary search
238 * or some other more efficient lookup was possible. (Not that
239 * asymptotic algorithmic complexity is remotely vital in option
240 * parsing, but if I were doing this in, say, Lisp or something
241 * with an equivalently powerful preprocessor then once I'd had
242 * the idea of preparing the option-parsing data structures at
243 * compile time I would probably have made the effort to prepare
244 * them _properly_. I could have Perl generate me a source file
245 * from some sort of description, I suppose, but that would seem
246 * like overkill. And in any case, it's more of a challenge to
247 * achieve as much as possible by cunning use of cpp and enum than
248 * to just write some sensible and logical code in a Turing-
249 * complete language. I said it was largely frivolous :-)
250 *
251 * This approach does have the virtue that it brings together the
252 * option ids, option spellings and help text into a single
253 * combined list and defines them all in exactly one place. If I
254 * want to add a new option, or a new spelling for an option, I
255 * only have to modify the main OPTHELP macro below and then add
256 * code to process the new logical id.
257 *
258 * (Though, really, even that isn't ideal, since it still involves
259 * modifying the source file in more than one place. In a
260 * _properly_ ideal world, I'd be able to interleave the option
261 * definitions with the code fragments that process them. And then
262 * not bother defining logical identifiers for them at all - those
263 * would be automatically generated, since I wouldn't have any
264 * need to specify them manually in another part of the code.)
265 */
266
267 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
268 HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
269 HELPPFX("actions") \
270 VAL(SCAN) SHORT(s) LONG(scan) \
271 HELPARG("directory") HELPOPT("scan and index a directory") \
272 NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \
273 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
274 HELPARG("directory") HELPOPT("scan only, generating a dump") \
275 NOVAL(LOAD) SHORT(l) LONG(load) \
276 HELPOPT("load and index a dump file") \
277 VAL(TEXT) SHORT(t) LONG(text) \
278 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
279 VAL(HTML) SHORT(H) LONG(html) \
280 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
281 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
282 HELPOPT("serve HTML reports from a temporary web server") \
283 HELPPFX("options") \
284 VAL(DATAFILE) SHORT(f) LONG(file) \
285 HELPARG("filename") HELPOPT("[all modes] specify index file") \
286 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
287 HELPOPT("[--scan] report progress on stderr") \
288 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
289 HELPOPT("[--scan] do not report progress") \
290 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
291 LONG(progress_tty) LONG(scan_progress_tty) \
292 HELPOPT("[--scan] report progress if stderr is a tty") \
293 NOVAL(CROSSFS) LONG(cross_fs) \
294 HELPOPT("[--scan] cross filesystem boundaries") \
295 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
296 HELPOPT("[--scan] stick to one filesystem") \
297 VAL(INCLUDE) LONG(include) \
298 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
299 VAL(INCLUDEPATH) LONG(include_path) \
300 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
301 VAL(EXCLUDE) LONG(exclude) \
302 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
303 VAL(EXCLUDEPATH) LONG(exclude_path) \
304 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
305 VAL(PRUNE) LONG(prune) \
306 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
307 VAL(PRUNEPATH) LONG(prune_path) \
308 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
309 NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \
310 HELPOPT("[--scan] keep real atimes on directories") \
311 NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
312 HELPOPT("[--scan] fake atimes on directories") \
313 VAL(TQDEPTH) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
314 HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \
315 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
316 HELPARG("age") HELPOPT("[--text] include only files older than this") \
317 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
318 HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \
319 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
320 LONG(server_addr) \
321 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
322 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
323 LONG(server_auth) LONG(web_auth) \
324 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
325 VAL(AUTHFILE) LONG(auth_file) \
326 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
327 VAL(AUTHFD) LONG(auth_fd) \
328 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
329 HELPPFX("also") \
330 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
331 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
332 NOVAL(LICENCE) LONG(licence) LONG(license) \
333 HELPOPT("display (MIT) licence text") \
334
335 #define IGNORE(x)
336 #define DEFENUM(x) OPT_ ## x,
337 #define ZERO(x) 0,
338 #define ONE(x) 1,
339 #define STRING(x) #x ,
340 #define STRINGNOCOMMA(x) #x
341 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
342 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
343 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
344 #define SHORTTMP(x) SHORTtmp3_ ## x,
345 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
346 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
347 #define LONGOPTVAL(x) LONGVAL_ ## x,
348 #define LONGTMP(x) SHORTtmp3_ ## x,
349
350 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
351 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
352
353 enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
354 enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
355 enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
356 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
357 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
358 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
359 enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
360 enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
361 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
362 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
363
364 static void usage(FILE *fp)
365 {
366 char longbuf[80];
367 const char *prefix, *shortopt, *longopt, *optarg;
368 int i, optex;
369
370 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
371 #define HELPNOVAL(s) optex = 0;
372 #define HELPVAL(s) optex = 1;
373 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
374 #define HELPLONG(s) if (!longopt) { \
375 strcpy(longbuf, "--" #s); longopt = longbuf; \
376 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
377 #define HELPPFX(s) prefix = s;
378 #define HELPARG(s) optarg = s;
379 #define HELPLINE(s) assert(optex == -1); \
380 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
381 HELPRESET;
382 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
383 assert(shortopt || longopt); \
384 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
385 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
386 optarg?" ":"", optarg?optarg:""); \
387 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
388
389 HELPRESET;
390 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
391 HELPPFX, HELPARG, HELPLINE, HELPOPT);
392
393 #undef HELPRESET
394 #undef HELPNOVAL
395 #undef HELPVAL
396 #undef HELPSHORT
397 #undef HELPLONG
398 #undef HELPPFX
399 #undef HELPARG
400 #undef HELPLINE
401 #undef HELPOPT
402 }
403
404 static time_t parse_age(time_t now, const char *agestr)
405 {
406 time_t t;
407 struct tm tm;
408 int nunits;
409 char unit[2];
410
411 t = now;
412
413 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
414 fprintf(stderr, "%s: age specification should be a number followed by"
415 " one of d,w,m,y\n", PNAME);
416 exit(1);
417 }
418
419 if (unit[0] == 'd') {
420 t -= 86400 * nunits;
421 } else if (unit[0] == 'w') {
422 t -= 86400 * 7 * nunits;
423 } else {
424 int ym;
425
426 tm = *localtime(&t);
427 ym = tm.tm_year * 12 + tm.tm_mon;
428
429 if (unit[0] == 'm')
430 ym -= nunits;
431 else
432 ym -= 12 * nunits;
433
434 tm.tm_year = ym / 12;
435 tm.tm_mon = ym % 12;
436
437 t = mktime(&tm);
438 }
439
440 return t;
441 }
442
443 int main(int argc, char **argv)
444 {
445 int fd, count;
446 struct ctx actx, *ctx = &actx;
447 struct stat st;
448 off_t totalsize, realsize;
449 void *mappedfile;
450 triewalk *tw;
451 indexbuild *ib;
452 const struct trie_file *tf;
453 char *filename = PNAME ".dat";
454 int doing_opts = 1;
455 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD };
456 struct action {
457 int mode;
458 char *arg;
459 } *actions = NULL;
460 int nactions = 0, actionsize = 0, action;
461 time_t now = time(NULL);
462 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
463 int htmlautoagerange = 1;
464 const char *httpserveraddr = NULL;
465 int httpserverport = 0;
466 const char *httpauthdata = NULL;
467 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
468 int progress = 1;
469 struct inclusion_exclusion *inex = NULL;
470 int ninex = 0, inexsize = 0;
471 int crossfs = 0;
472 int tqdepth = 1;
473 int fakediratimes = 1;
474
475 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
476 {
477 static const char *const optnames[NOPTIONS] = {
478 OPTIONS(STRING,STRING,IGNORE,IGNORE)
479 };
480 int i;
481 for (i = 0; i < NSHORTOPTS; i++)
482 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
483 opthasval[shortvals[i]] ? "value" : "no value");
484 for (i = 0; i < NLONGOPTS; i++)
485 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
486 opthasval[longvals[i]] ? "value" : "no value");
487 }
488 #endif
489
490 while (--argc > 0) {
491 char *p = *++argv;
492
493 if (doing_opts && *p == '-') {
494 int wordstart = 1;
495
496 if (!strcmp(p, "--")) {
497 doing_opts = 0;
498 continue;
499 }
500
501 p++;
502 while (*p) {
503 int optid = -1;
504 int i;
505 char *optval;
506
507 if (wordstart && *p == '-') {
508 /*
509 * GNU-style long option.
510 */
511 p++;
512 optval = strchr(p, '=');
513 if (optval)
514 *optval++ = '\0';
515
516 for (i = 0; i < NLONGOPTS; i++) {
517 const char *opt = longopts[i], *s = p;
518 int match = 1;
519 /*
520 * The underscores in the option names
521 * defined above may be given by the user
522 * as underscores or dashes, or omitted
523 * entirely.
524 */
525 while (*opt) {
526 if (*opt == '_') {
527 if (*s == '-' || *s == '_')
528 s++;
529 } else {
530 if (*opt != *s) {
531 match = 0;
532 break;
533 }
534 s++;
535 }
536 opt++;
537 }
538 if (match && !*s) {
539 optid = longvals[i];
540 break;
541 }
542 }
543
544 if (optid < 0) {
545 fprintf(stderr, "%s: unrecognised option '--%s'\n",
546 PNAME, p);
547 return 1;
548 }
549
550 if (!opthasval[optid]) {
551 if (optval) {
552 fprintf(stderr, "%s: unexpected argument to option"
553 " '--%s'\n", PNAME, p);
554 return 1;
555 }
556 } else {
557 if (!optval) {
558 if (--argc > 0) {
559 optval = *++argv;
560 } else {
561 fprintf(stderr, "%s: option '--%s' expects"
562 " an argument\n", PNAME, p);
563 return 1;
564 }
565 }
566 }
567
568 p += strlen(p); /* finished with this argument word */
569 } else {
570 /*
571 * Short option.
572 */
573 char c = *p++;
574
575 for (i = 0; i < NSHORTOPTS; i++)
576 if (c == shortopts[i]) {
577 optid = shortvals[i];
578 break;
579 }
580
581 if (optid < 0) {
582 fprintf(stderr, "%s: unrecognised option '-%c'\n",
583 PNAME, c);
584 return 1;
585 }
586
587 if (opthasval[optid]) {
588 if (*p) {
589 optval = p;
590 p += strlen(p);
591 } else if (--argc > 0) {
592 optval = *++argv;
593 } else {
594 fprintf(stderr, "%s: option '-%c' expects"
595 " an argument\n", PNAME, c);
596 return 1;
597 }
598 } else {
599 optval = NULL;
600 }
601 }
602
603 wordstart = 0;
604
605 /*
606 * Now actually process the option.
607 */
608 switch (optid) {
609 case OPT_HELP:
610 usage(stdout);
611 return 0;
612 case OPT_VERSION:
613 printf("FIXME: version();\n");
614 return 0;
615 case OPT_LICENCE:
616 {
617 extern const char *const licence[];
618 int i;
619
620 for (i = 0; licence[i]; i++)
621 fputs(licence[i], stdout);
622
623 return 0;
624 }
625 return 0;
626 case OPT_SCAN:
627 if (nactions >= actionsize) {
628 actionsize = nactions * 3 / 2 + 16;
629 actions = sresize(actions, actionsize, struct action);
630 }
631 actions[nactions].mode = SCAN;
632 actions[nactions].arg = optval;
633 nactions++;
634 break;
635 case OPT_SCANDUMP:
636 if (nactions >= actionsize) {
637 actionsize = nactions * 3 / 2 + 16;
638 actions = sresize(actions, actionsize, struct action);
639 }
640 actions[nactions].mode = SCANDUMP;
641 actions[nactions].arg = optval;
642 nactions++;
643 break;
644 case OPT_DUMP:
645 if (nactions >= actionsize) {
646 actionsize = nactions * 3 / 2 + 16;
647 actions = sresize(actions, actionsize, struct action);
648 }
649 actions[nactions].mode = DUMP;
650 actions[nactions].arg = NULL;
651 nactions++;
652 break;
653 case OPT_LOAD:
654 if (nactions >= actionsize) {
655 actionsize = nactions * 3 / 2 + 16;
656 actions = sresize(actions, actionsize, struct action);
657 }
658 actions[nactions].mode = LOAD;
659 actions[nactions].arg = NULL;
660 nactions++;
661 break;
662 case OPT_TEXT:
663 if (nactions >= actionsize) {
664 actionsize = nactions * 3 / 2 + 16;
665 actions = sresize(actions, actionsize, struct action);
666 }
667 actions[nactions].mode = TEXT;
668 actions[nactions].arg = optval;
669 nactions++;
670 break;
671 case OPT_HTML:
672 if (nactions >= actionsize) {
673 actionsize = nactions * 3 / 2 + 16;
674 actions = sresize(actions, actionsize, struct action);
675 }
676 actions[nactions].mode = HTML;
677 actions[nactions].arg = optval;
678 nactions++;
679 break;
680 case OPT_HTTPD:
681 if (nactions >= actionsize) {
682 actionsize = nactions * 3 / 2 + 16;
683 actions = sresize(actions, actionsize, struct action);
684 }
685 actions[nactions].mode = HTTPD;
686 actions[nactions].arg = NULL;
687 nactions++;
688 break;
689 case OPT_PROGRESS:
690 progress = 2;
691 break;
692 case OPT_NOPROGRESS:
693 progress = 0;
694 break;
695 case OPT_TTYPROGRESS:
696 progress = 1;
697 break;
698 case OPT_CROSSFS:
699 crossfs = 1;
700 break;
701 case OPT_NOCROSSFS:
702 crossfs = 0;
703 break;
704 case OPT_DIRATIME:
705 fakediratimes = 0;
706 break;
707 case OPT_NODIRATIME:
708 fakediratimes = 1;
709 break;
710 case OPT_DATAFILE:
711 filename = optval;
712 break;
713 case OPT_TQDEPTH:
714 tqdepth = atoi(optval);
715 break;
716 case OPT_MINAGE:
717 textcutoff = parse_age(now, optval);
718 break;
719 case OPT_AGERANGE:
720 if (!strcmp(optval, "auto")) {
721 htmlautoagerange = 1;
722 } else {
723 char *q = optval + strcspn(optval, "-:");
724 if (*q)
725 *q++ = '\0';
726 htmloldest = parse_age(now, optval);
727 htmlnewest = *q ? parse_age(now, q) : now;
728 htmlautoagerange = 0;
729 }
730 break;
731 case OPT_SERVERADDR:
732 {
733 char *port;
734 if (optval[0] == '[' &&
735 (port = strchr(optval, ']')) != NULL)
736 port++;
737 else
738 port = optval;
739 port += strcspn(port, ":");
740 if (port)
741 *port++ = '\0';
742 httpserveraddr = optval;
743 httpserverport = atoi(port);
744 }
745 break;
746 case OPT_AUTH:
747 if (!strcmp(optval, "magic"))
748 auth = HTTPD_AUTH_MAGIC;
749 else if (!strcmp(optval, "basic"))
750 auth = HTTPD_AUTH_BASIC;
751 else if (!strcmp(optval, "none"))
752 auth = HTTPD_AUTH_NONE;
753 else if (!strcmp(optval, "default"))
754 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
755 else if (!strcmp(optval, "help") ||
756 !strcmp(optval, "list")) {
757 printf(PNAME ": supported HTTP authentication types"
758 " are:\n"
759 " magic use Linux /proc/net/tcp to"
760 " determine owner of peer socket\n"
761 " basic HTTP Basic username and"
762 " password authentication\n"
763 " default use 'magic' if possible, "
764 " otherwise fall back to 'basic'\n"
765 " none unauthenticated HTTP (if"
766 " the data file is non-confidential)\n");
767 return 0;
768 } else {
769 fprintf(stderr, "%s: unrecognised authentication"
770 " type '%s'\n%*s options are 'magic',"
771 " 'basic', 'none', 'default'\n",
772 PNAME, optval, (int)strlen(PNAME), "");
773 return 1;
774 }
775 break;
776 case OPT_AUTHFILE:
777 case OPT_AUTHFD:
778 {
779 int fd;
780 char namebuf[40];
781 const char *name;
782 char *authbuf;
783 int authlen, authsize;
784 int ret;
785
786 if (optid == OPT_AUTHFILE) {
787 fd = open(optval, O_RDONLY);
788 if (fd < 0) {
789 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
790 optval, strerror(errno));
791 return 1;
792 }
793 name = optval;
794 } else {
795 fd = atoi(optval);
796 name = namebuf;
797 sprintf(namebuf, "fd %d", fd);
798 }
799
800 authlen = 0;
801 authsize = 256;
802 authbuf = snewn(authsize, char);
803 while ((ret = read(fd, authbuf+authlen,
804 authsize-authlen)) > 0) {
805 authlen += ret;
806 if ((authsize - authlen) < (authsize / 16)) {
807 authsize = authlen * 3 / 2 + 4096;
808 authbuf = sresize(authbuf, authsize, char);
809 }
810 }
811 if (ret < 0) {
812 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
813 name, strerror(errno));
814 return 1;
815 }
816 if (optid == OPT_AUTHFILE)
817 close(fd);
818 httpauthdata = authbuf;
819 }
820 break;
821 case OPT_INCLUDE:
822 case OPT_INCLUDEPATH:
823 case OPT_EXCLUDE:
824 case OPT_EXCLUDEPATH:
825 case OPT_PRUNE:
826 case OPT_PRUNEPATH:
827 if (ninex >= inexsize) {
828 inexsize = ninex * 3 / 2 + 16;
829 inex = sresize(inex, inexsize,
830 struct inclusion_exclusion);
831 }
832 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
833 optid == OPT_EXCLUDEPATH ||
834 optid == OPT_PRUNEPATH);
835 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
836 optid == OPT_INCLUDEPATH ? 1 :
837 optid == OPT_EXCLUDE ? 0 :
838 optid == OPT_EXCLUDEPATH ? 0 :
839 optid == OPT_PRUNE ? -1 :
840 /* optid == OPT_PRUNEPATH ? */ -1);
841 inex[ninex].wildcard = optval;
842 ninex++;
843 break;
844 }
845 }
846 } else {
847 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
848 return 1;
849 }
850 }
851
852 if (nactions == 0) {
853 usage(stderr);
854 return 1;
855 }
856
857 for (action = 0; action < nactions; action++) {
858 int mode = actions[action].mode;
859
860 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
861 const char *scandir = actions[action].arg;
862 if (mode == LOAD) {
863 char *buf = fgetline(stdin);
864 unsigned newpathsep;
865 buf[strcspn(buf, "\r\n")] = '\0';
866 if (1 != sscanf(buf, DUMPHDR "%x",
867 &newpathsep)) {
868 fprintf(stderr, "%s: header in dump file not recognised\n",
869 PNAME);
870 return 1;
871 }
872 pathsep = (char)newpathsep;
873 sfree(buf);
874 }
875
876 if (mode == SCAN || mode == LOAD) {
877 /*
878 * Prepare to write out the index file.
879 */
880 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
881 if (fd < 0) {
882 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
883 strerror(errno));
884 return 1;
885 }
886 if (fstat(fd, &st) < 0) {
887 perror(PNAME ": fstat");
888 return 1;
889 }
890 ctx->datafile_dev = st.st_dev;
891 ctx->datafile_ino = st.st_ino;
892 ctx->straight_to_dump = 0;
893 } else {
894 ctx->datafile_dev = -1;
895 ctx->datafile_ino = -1;
896 ctx->straight_to_dump = 1;
897 }
898
899 if (mode == SCAN || mode == SCANDUMP) {
900 if (stat(scandir, &st) < 0) {
901 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
902 strerror(errno));
903 return 1;
904 }
905 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
906 }
907
908 ctx->inex = inex;
909 ctx->ninex = ninex;
910 ctx->crossfs = crossfs;
911 ctx->fakeatimes = fakediratimes;
912
913 ctx->last_output_update = time(NULL);
914
915 /* progress==1 means report progress only if stderr is a tty */
916 if (progress == 1)
917 progress = isatty(2) ? 2 : 0;
918 ctx->progress = progress;
919 {
920 struct winsize ws;
921 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
922 ctx->progwidth = ws.ws_col - 1;
923 else
924 ctx->progwidth = 79;
925 }
926
927 if (mode == SCANDUMP)
928 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
929
930 /*
931 * Scan the directory tree, and write out the trie component
932 * of the data file.
933 */
934 if (mode != SCANDUMP) {
935 ctx->tb = triebuild_new(fd);
936 }
937 if (mode == LOAD) {
938 char *buf;
939 int line = 2;
940 while ((buf = fgetline(stdin)) != NULL) {
941 struct trie_file tf;
942 char *p, *q;
943
944 buf[strcspn(buf, "\r\n")] = '\0';
945
946 p = buf;
947 q = p;
948 while (*p && *p != ' ') p++;
949 if (!*p) {
950 fprintf(stderr, "%s: dump file line %d: expected at least"
951 " three fields\n", PNAME, line);
952 return 1;
953 }
954 *p++ = '\0';
955 tf.size = strtoull(q, NULL, 10);
956 q = p;
957 while (*p && *p != ' ') p++;
958 if (!*p) {
959 fprintf(stderr, "%s: dump file line %d: expected at least"
960 " three fields\n", PNAME, line);
961 return 1;
962 }
963 *p++ = '\0';
964 tf.atime = strtoull(q, NULL, 10);
965 q = buf;
966 while (*p) {
967 int c = *p;
968 if (*p == '%') {
969 int i;
970 p++;
971 c = 0;
972 for (i = 0; i < 2; i++) {
973 c *= 16;
974 if (*p >= '0' && *p <= '9')
975 c += *p - '0';
976 else if (*p >= 'A' && *p <= 'F')
977 c += *p - ('A' - 10);
978 else if (*p >= 'a' && *p <= 'f')
979 c += *p - ('a' - 10);
980 else {
981 fprintf(stderr, "%s: dump file line %d: unable"
982 " to parse hex escape\n", PNAME, line);
983 }
984 p++;
985 }
986 }
987 *q++ = c;
988 p++;
989 }
990 *q = '\0';
991 triebuild_add(ctx->tb, buf, &tf);
992 sfree(buf);
993 line++;
994 }
995 } else {
996 du(scandir, gotdata, ctx);
997 }
998 if (mode != SCANDUMP) {
999 count = triebuild_finish(ctx->tb);
1000 triebuild_free(ctx->tb);
1001
1002 if (ctx->progress) {
1003 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
1004 fflush(stderr);
1005 }
1006
1007 /*
1008 * Work out how much space the cumulative index trees
1009 * will take; enlarge the file, and memory-map it.
1010 */
1011 if (fstat(fd, &st) < 0) {
1012 perror(PNAME ": fstat");
1013 return 1;
1014 }
1015
1016 printf("Built pathname index, %d entries, %ju bytes\n", count,
1017 (intmax_t)st.st_size);
1018
1019 totalsize = index_compute_size(st.st_size, count);
1020
1021 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
1022 perror(PNAME ": lseek");
1023 return 1;
1024 }
1025 if (write(fd, "\0", 1) < 1) {
1026 perror(PNAME ": write");
1027 return 1;
1028 }
1029
1030 printf("Upper bound on index file size = %ju bytes\n",
1031 (intmax_t)totalsize);
1032
1033 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1034 if (!mappedfile) {
1035 perror(PNAME ": mmap");
1036 return 1;
1037 }
1038
1039 if (fakediratimes) {
1040 printf("Faking directory atimes\n");
1041 trie_fake_dir_atimes(mappedfile);
1042 }
1043
1044 printf("Building index\n");
1045 ib = indexbuild_new(mappedfile, st.st_size, count);
1046 tw = triewalk_new(mappedfile);
1047 while ((tf = triewalk_next(tw, NULL)) != NULL)
1048 indexbuild_add(ib, tf);
1049 triewalk_free(tw);
1050 realsize = indexbuild_realsize(ib);
1051 indexbuild_free(ib);
1052
1053 munmap(mappedfile, totalsize);
1054 ftruncate(fd, realsize);
1055 close(fd);
1056 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
1057 }
1058 } else if (mode == TEXT) {
1059 char *querydir = actions[action].arg;
1060 size_t pathlen;
1061
1062 fd = open(filename, O_RDONLY);
1063 if (fd < 0) {
1064 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1065 strerror(errno));
1066 return 1;
1067 }
1068 if (fstat(fd, &st) < 0) {
1069 perror(PNAME ": fstat");
1070 return 1;
1071 }
1072 totalsize = st.st_size;
1073 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1074 if (!mappedfile) {
1075 perror(PNAME ": mmap");
1076 return 1;
1077 }
1078 pathsep = trie_pathsep(mappedfile);
1079
1080 /*
1081 * Trim trailing slash, just in case.
1082 */
1083 pathlen = strlen(querydir);
1084 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1085 querydir[--pathlen] = '\0';
1086
1087 text_query(mappedfile, querydir, textcutoff, tqdepth);
1088 } else if (mode == HTML) {
1089 char *querydir = actions[action].arg;
1090 size_t pathlen;
1091 struct html_config cfg;
1092 unsigned long xi;
1093 char *html;
1094
1095 fd = open(filename, O_RDONLY);
1096 if (fd < 0) {
1097 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1098 strerror(errno));
1099 return 1;
1100 }
1101 if (fstat(fd, &st) < 0) {
1102 perror(PNAME ": fstat");
1103 return 1;
1104 }
1105 totalsize = st.st_size;
1106 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1107 if (!mappedfile) {
1108 perror(PNAME ": mmap");
1109 return 1;
1110 }
1111 pathsep = trie_pathsep(mappedfile);
1112
1113 /*
1114 * Trim trailing slash, just in case.
1115 */
1116 pathlen = strlen(querydir);
1117 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1118 querydir[--pathlen] = '\0';
1119
1120 xi = trie_before(mappedfile, querydir);
1121 cfg.format = NULL;
1122 cfg.autoage = htmlautoagerange;
1123 cfg.oldest = htmloldest;
1124 cfg.newest = htmlnewest;
1125 html = html_query(mappedfile, xi, &cfg);
1126 fputs(html, stdout);
1127 } else if (mode == DUMP) {
1128 size_t maxpathlen;
1129 char *buf;
1130
1131 fd = open(filename, O_RDONLY);
1132 if (fd < 0) {
1133 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1134 strerror(errno));
1135 return 1;
1136 }
1137 if (fstat(fd, &st) < 0) {
1138 perror(PNAME ": fstat");
1139 return 1;
1140 }
1141 totalsize = st.st_size;
1142 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1143 if (!mappedfile) {
1144 perror(PNAME ": mmap");
1145 return 1;
1146 }
1147 pathsep = trie_pathsep(mappedfile);
1148
1149 maxpathlen = trie_maxpathlen(mappedfile);
1150 buf = snewn(maxpathlen, char);
1151
1152 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
1153 tw = triewalk_new(mappedfile);
1154 while ((tf = triewalk_next(tw, buf)) != NULL)
1155 dump_line(buf, tf);
1156 triewalk_free(tw);
1157 } else if (mode == HTTPD) {
1158 struct html_config pcfg;
1159 struct httpd_config dcfg;
1160
1161 fd = open(filename, O_RDONLY);
1162 if (fd < 0) {
1163 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1164 strerror(errno));
1165 return 1;
1166 }
1167 if (fstat(fd, &st) < 0) {
1168 perror(PNAME ": fstat");
1169 return 1;
1170 }
1171 totalsize = st.st_size;
1172 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1173 if (!mappedfile) {
1174 perror(PNAME ": mmap");
1175 return 1;
1176 }
1177 pathsep = trie_pathsep(mappedfile);
1178
1179 dcfg.address = httpserveraddr;
1180 dcfg.port = httpserverport;
1181 dcfg.basicauthdata = httpauthdata;
1182 pcfg.format = NULL;
1183 pcfg.autoage = htmlautoagerange;
1184 pcfg.oldest = htmloldest;
1185 pcfg.newest = htmlnewest;
1186 run_httpd(mappedfile, auth, &dcfg, &pcfg);
1187 }
1188 }
1189
1190 return 0;
1191 }