50affaa9f529f5a871fb9097a129bc26360c65a3
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #include "agedu.h"
6
7 #include "du.h"
8 #include "trie.h"
9 #include "index.h"
10 #include "alloc.h"
11 #include "html.h"
12 #include "httpd.h"
13 #include "fgetline.h"
14
15 /*
16 * Path separator. This global variable affects the behaviour of
17 * various parts of the code when they need to deal with path
18 * separators. The path separator appropriate to a particular data
19 * set is encoded in the index file storing that data set; data
20 * sets generated on Unix will of course have the default '/', but
21 * foreign data sets are conceivable and must be handled correctly.
22 */
23 char pathsep = '/';
24
25 void fatal(const char *fmt, ...)
26 {
27 va_list ap;
28 fprintf(stderr, "%s: ", PNAME);
29 va_start(ap, fmt);
30 vfprintf(stderr, fmt, ap);
31 va_end(ap);
32 fprintf(stderr, "\n");
33 exit(1);
34 }
35
36 struct inclusion_exclusion {
37 int type;
38 const char *wildcard;
39 int path;
40 };
41
42 struct ctx {
43 triebuild *tb;
44 dev_t datafile_dev, filesystem_dev;
45 ino_t datafile_ino;
46 time_t last_output_update;
47 int progress, progwidth;
48 int straight_to_dump;
49 struct inclusion_exclusion *inex;
50 int ninex;
51 int crossfs;
52 int usemtime;
53 int fakeatimes;
54 };
55
56 static void dump_line(const char *pathname, const struct trie_file *tf)
57 {
58 const char *p;
59 printf("%llu %llu ", tf->size, tf->atime);
60 for (p = pathname; *p; p++) {
61 if (*p >= ' ' && *p < 127 && *p != '%')
62 putchar(*p);
63 else
64 printf("%%%02x", (unsigned char)*p);
65 }
66 putchar('\n');
67 }
68
69 static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
70 {
71 struct ctx *ctx = (struct ctx *)vctx;
72 struct trie_file file;
73 time_t t;
74 int i, include;
75 const char *filename;
76
77 /*
78 * Filter out our own data file.
79 */
80 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
81 return 0;
82
83 /*
84 * Don't cross the streams^W^Wany file system boundary.
85 */
86 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
87 return 0;
88
89 file.size = (unsigned long long)512 * st->st_blocks;
90 if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode)))
91 file.atime = st->st_mtime;
92 else
93 file.atime = st->st_atime;
94
95 /*
96 * Filter based on wildcards.
97 */
98 include = 1;
99 filename = strrchr(pathname, pathsep);
100 if (!filename)
101 filename = pathname;
102 else
103 filename++;
104 for (i = 0; i < ctx->ninex; i++) {
105 if (fnmatch(ctx->inex[i].wildcard,
106 ctx->inex[i].path ? pathname : filename, 0) == 0)
107 include = ctx->inex[i].type;
108 }
109 if (include == -1)
110 return 0; /* ignore this entry and any subdirs */
111 if (include == 0) {
112 /*
113 * Here we are supposed to be filtering an entry out, but
114 * still recursing into it if it's a directory. However,
115 * we can't actually leave out any directory whose
116 * subdirectories we then look at. So we cheat, in that
117 * case, by setting the size to zero.
118 */
119 if (!S_ISDIR(st->st_mode))
120 return 0; /* just ignore */
121 else
122 file.size = 0;
123 }
124
125 if (ctx->straight_to_dump)
126 dump_line(pathname, &file);
127 else
128 triebuild_add(ctx->tb, pathname, &file);
129
130 if (ctx->progress) {
131 t = time(NULL);
132 if (t != ctx->last_output_update) {
133 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
134 pathname);
135 fflush(stderr);
136 ctx->last_output_update = t;
137 }
138 }
139
140 return 1;
141 }
142
143 static void text_query(const void *mappedfile, const char *querydir,
144 time_t t, int depth)
145 {
146 size_t maxpathlen;
147 char *pathbuf;
148 unsigned long xi1, xi2;
149 unsigned long long s1, s2;
150
151 maxpathlen = trie_maxpathlen(mappedfile);
152 pathbuf = snewn(maxpathlen + 1, char);
153
154 /*
155 * We want to query everything between the supplied filename
156 * (inclusive) and that filename with a ^A on the end
157 * (exclusive). So find the x indices for each.
158 */
159 strcpy(pathbuf, querydir);
160 make_successor(pathbuf);
161 xi1 = trie_before(mappedfile, querydir);
162 xi2 = trie_before(mappedfile, pathbuf);
163
164 if (xi2 - xi1 == 1)
165 return; /* file, or empty dir => no display */
166
167 /*
168 * Now do the lookups in the age index.
169 */
170 s1 = index_query(mappedfile, xi1, t);
171 s2 = index_query(mappedfile, xi2, t);
172
173 if (s1 == s2)
174 return; /* no space taken up => no display */
175
176 if (depth > 0) {
177 /*
178 * Now scan for first-level subdirectories and report
179 * those too.
180 */
181 xi1++;
182 while (xi1 < xi2) {
183 trie_getpath(mappedfile, xi1, pathbuf);
184 text_query(mappedfile, pathbuf, t, depth-1);
185 make_successor(pathbuf);
186 xi1 = trie_before(mappedfile, pathbuf);
187 }
188 }
189
190 /* Display in units of 1Kb */
191 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
192 }
193
194 /*
195 * Largely frivolous way to define all my command-line options. I
196 * present here a parametric macro which declares a series of
197 * _logical_ option identifiers, and for each one declares zero or
198 * more short option characters and zero or more long option
199 * words. Then I repeatedly invoke that macro with its arguments
200 * defined to be various other macros, which allows me to
201 * variously:
202 *
203 * - define an enum allocating a distinct integer value to each
204 * logical option id
205 * - define a string consisting of precisely all the short option
206 * characters
207 * - define a string array consisting of all the long option
208 * strings
209 * - define (with help from auxiliary enums) integer arrays
210 * parallel to both of the above giving the logical option id
211 * for each physical short and long option
212 * - define an array indexed by logical option id indicating
213 * whether the option in question takes a value
214 * - define a function which prints out brief online help for all
215 * the options.
216 *
217 * It's not at all clear to me that this trickery is actually
218 * particularly _efficient_ - it still, after all, requires going
219 * linearly through the option list at run time and doing a
220 * strcmp, whereas in an ideal world I'd have liked the lists of
221 * long and short options to be pre-sorted so that a binary search
222 * or some other more efficient lookup was possible. (Not that
223 * asymptotic algorithmic complexity is remotely vital in option
224 * parsing, but if I were doing this in, say, Lisp or something
225 * with an equivalently powerful preprocessor then once I'd had
226 * the idea of preparing the option-parsing data structures at
227 * compile time I would probably have made the effort to prepare
228 * them _properly_. I could have Perl generate me a source file
229 * from some sort of description, I suppose, but that would seem
230 * like overkill. And in any case, it's more of a challenge to
231 * achieve as much as possible by cunning use of cpp and enum than
232 * to just write some sensible and logical code in a Turing-
233 * complete language. I said it was largely frivolous :-)
234 *
235 * This approach does have the virtue that it brings together the
236 * option ids, option spellings and help text into a single
237 * combined list and defines them all in exactly one place. If I
238 * want to add a new option, or a new spelling for an option, I
239 * only have to modify the main OPTHELP macro below and then add
240 * code to process the new logical id.
241 *
242 * (Though, really, even that isn't ideal, since it still involves
243 * modifying the source file in more than one place. In a
244 * _properly_ ideal world, I'd be able to interleave the option
245 * definitions with the code fragments that process them. And then
246 * not bother defining logical identifiers for them at all - those
247 * would be automatically generated, since I wouldn't have any
248 * need to specify them manually in another part of the code.)
249 *
250 * One other helpful consequence of the enum-based structure here
251 * is that it causes a compiler error if I accidentally try to
252 * define the same option (short or long) twice.
253 */
254
255 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
256 HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
257 HELPPFX("actions") \
258 VAL(SCAN) SHORT(s) LONG(scan) \
259 HELPARG("directory") HELPOPT("scan and index a directory") \
260 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
261 HELPOPT("serve HTML reports from a temporary web server") \
262 VAL(TEXT) SHORT(t) LONG(text) \
263 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
264 NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \
265 HELPOPT("remove the index file") \
266 NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \
267 NOVAL(LOAD) SHORT(L) LONG(load) \
268 HELPOPT("load and index a dump file") \
269 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
270 HELPARG("directory") HELPOPT("scan only, generating a dump") \
271 VAL(HTML) SHORT(H) LONG(html) \
272 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
273 HELPPFX("options") \
274 VAL(DATAFILE) SHORT(f) LONG(file) \
275 HELPARG("filename") HELPOPT("[most modes] specify index file") \
276 NOVAL(CROSSFS) LONG(cross_fs) \
277 HELPOPT("[--scan] cross filesystem boundaries") \
278 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
279 HELPOPT("[--scan] stick to one filesystem") \
280 VAL(PRUNE) LONG(prune) \
281 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
282 VAL(PRUNEPATH) LONG(prune_path) \
283 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
284 VAL(EXCLUDE) LONG(exclude) \
285 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
286 VAL(EXCLUDEPATH) LONG(exclude_path) \
287 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
288 VAL(INCLUDE) LONG(include) \
289 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
290 VAL(INCLUDEPATH) LONG(include_path) \
291 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
292 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
293 HELPOPT("[--scan] report progress on stderr") \
294 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
295 HELPOPT("[--scan] do not report progress") \
296 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
297 LONG(progress_tty) LONG(scan_progress_tty) \
298 HELPOPT("[--scan] report progress if stderr is a tty") \
299 NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \
300 HELPOPT("[--scan,--load] keep real atimes on directories") \
301 NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
302 HELPOPT("[--scan,--load] fake atimes on directories") \
303 NOVAL(MTIME) LONG(mtime) \
304 HELPOPT("[--scan] use mtime instead of atime") \
305 NOVAL(FULL) LONG(full_index) \
306 HELPOPT("[--scan] index every file individually") \
307 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
308 HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \
309 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
310 LONG(server_addr) \
311 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
312 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
313 LONG(server_auth) LONG(web_auth) \
314 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
315 VAL(AUTHFILE) LONG(auth_file) \
316 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
317 VAL(AUTHFD) LONG(auth_fd) \
318 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
319 VAL(TQDEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
320 HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \
321 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
322 HELPARG("age") HELPOPT("[--text] include only files older than this") \
323 HELPPFX("also") \
324 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
325 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
326 NOVAL(LICENCE) LONG(licence) LONG(license) \
327 HELPOPT("display (MIT) licence text") \
328
329 #define IGNORE(x)
330 #define DEFENUM(x) OPT_ ## x,
331 #define ZERO(x) 0,
332 #define ONE(x) 1,
333 #define STRING(x) #x ,
334 #define STRINGNOCOMMA(x) #x
335 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
336 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
337 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
338 #define SHORTTMP(x) SHORTtmp3_ ## x,
339 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
340 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
341 #define LONGOPTVAL(x) LONGVAL_ ## x,
342 #define LONGTMP(x) SHORTtmp3_ ## x,
343
344 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
345 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
346
347 enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
348 enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
349 enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
350 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
351 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
352 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
353 enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
354 enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
355 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
356 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
357
358 static void usage(FILE *fp)
359 {
360 char longbuf[80];
361 const char *prefix, *shortopt, *longopt, *optarg;
362 int i, optex;
363
364 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
365 #define HELPNOVAL(s) optex = 0;
366 #define HELPVAL(s) optex = 1;
367 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
368 #define HELPLONG(s) if (!longopt) { \
369 strcpy(longbuf, "--" #s); longopt = longbuf; \
370 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
371 #define HELPPFX(s) prefix = s;
372 #define HELPARG(s) optarg = s;
373 #define HELPLINE(s) assert(optex == -1); \
374 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
375 HELPRESET;
376 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
377 assert(shortopt || longopt); \
378 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
379 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
380 optarg?" ":"", optarg?optarg:""); \
381 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
382
383 HELPRESET;
384 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
385 HELPPFX, HELPARG, HELPLINE, HELPOPT);
386
387 #undef HELPRESET
388 #undef HELPNOVAL
389 #undef HELPVAL
390 #undef HELPSHORT
391 #undef HELPLONG
392 #undef HELPPFX
393 #undef HELPARG
394 #undef HELPLINE
395 #undef HELPOPT
396 }
397
398 static time_t parse_age(time_t now, const char *agestr)
399 {
400 time_t t;
401 struct tm tm;
402 int nunits;
403 char unit[2];
404
405 t = now;
406
407 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
408 fprintf(stderr, "%s: age specification should be a number followed by"
409 " one of d,w,m,y\n", PNAME);
410 exit(1);
411 }
412
413 if (unit[0] == 'd') {
414 t -= 86400 * nunits;
415 } else if (unit[0] == 'w') {
416 t -= 86400 * 7 * nunits;
417 } else {
418 int ym;
419
420 tm = *localtime(&t);
421 ym = tm.tm_year * 12 + tm.tm_mon;
422
423 if (unit[0] == 'm')
424 ym -= nunits;
425 else
426 ym -= 12 * nunits;
427
428 tm.tm_year = ym / 12;
429 tm.tm_mon = ym % 12;
430
431 t = mktime(&tm);
432 }
433
434 return t;
435 }
436
437 int main(int argc, char **argv)
438 {
439 int fd, count;
440 struct ctx actx, *ctx = &actx;
441 struct stat st;
442 off_t totalsize, realsize;
443 void *mappedfile;
444 triewalk *tw;
445 indexbuild *ib;
446 const struct trie_file *tf, *prevtf;
447 char *filename = PNAME ".dat";
448 int doing_opts = 1;
449 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE };
450 struct action {
451 int mode;
452 char *arg;
453 } *actions = NULL;
454 int nactions = 0, actionsize = 0, action;
455 time_t now = time(NULL);
456 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
457 int htmlautoagerange = 1;
458 const char *httpserveraddr = NULL;
459 int httpserverport = 0;
460 const char *httpauthdata = NULL;
461 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
462 int progress = 1;
463 struct inclusion_exclusion *inex = NULL;
464 int ninex = 0, inexsize = 0;
465 int crossfs = 0;
466 int tqdepth = 1;
467 int fakediratimes = 1;
468 int mtime = 0;
469 int fullindex = 0;
470
471 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
472 {
473 static const char *const optnames[NOPTIONS] = {
474 OPTIONS(STRING,STRING,IGNORE,IGNORE)
475 };
476 int i;
477 for (i = 0; i < NSHORTOPTS; i++)
478 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
479 opthasval[shortvals[i]] ? "value" : "no value");
480 for (i = 0; i < NLONGOPTS; i++)
481 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
482 opthasval[longvals[i]] ? "value" : "no value");
483 }
484 #endif
485
486 while (--argc > 0) {
487 char *p = *++argv;
488
489 if (doing_opts && *p == '-') {
490 int wordstart = 1;
491
492 if (!strcmp(p, "--")) {
493 doing_opts = 0;
494 continue;
495 }
496
497 p++;
498 while (*p) {
499 int optid = -1;
500 int i;
501 char *optval;
502
503 if (wordstart && *p == '-') {
504 /*
505 * GNU-style long option.
506 */
507 p++;
508 optval = strchr(p, '=');
509 if (optval)
510 *optval++ = '\0';
511
512 for (i = 0; i < NLONGOPTS; i++) {
513 const char *opt = longopts[i], *s = p;
514 int match = 1;
515 /*
516 * The underscores in the option names
517 * defined above may be given by the user
518 * as underscores or dashes, or omitted
519 * entirely.
520 */
521 while (*opt) {
522 if (*opt == '_') {
523 if (*s == '-' || *s == '_')
524 s++;
525 } else {
526 if (*opt != *s) {
527 match = 0;
528 break;
529 }
530 s++;
531 }
532 opt++;
533 }
534 if (match && !*s) {
535 optid = longvals[i];
536 break;
537 }
538 }
539
540 if (optid < 0) {
541 fprintf(stderr, "%s: unrecognised option '--%s'\n",
542 PNAME, p);
543 return 1;
544 }
545
546 if (!opthasval[optid]) {
547 if (optval) {
548 fprintf(stderr, "%s: unexpected argument to option"
549 " '--%s'\n", PNAME, p);
550 return 1;
551 }
552 } else {
553 if (!optval) {
554 if (--argc > 0) {
555 optval = *++argv;
556 } else {
557 fprintf(stderr, "%s: option '--%s' expects"
558 " an argument\n", PNAME, p);
559 return 1;
560 }
561 }
562 }
563
564 p += strlen(p); /* finished with this argument word */
565 } else {
566 /*
567 * Short option.
568 */
569 char c = *p++;
570
571 for (i = 0; i < NSHORTOPTS; i++)
572 if (c == shortopts[i]) {
573 optid = shortvals[i];
574 break;
575 }
576
577 if (optid < 0) {
578 fprintf(stderr, "%s: unrecognised option '-%c'\n",
579 PNAME, c);
580 return 1;
581 }
582
583 if (opthasval[optid]) {
584 if (*p) {
585 optval = p;
586 p += strlen(p);
587 } else if (--argc > 0) {
588 optval = *++argv;
589 } else {
590 fprintf(stderr, "%s: option '-%c' expects"
591 " an argument\n", PNAME, c);
592 return 1;
593 }
594 } else {
595 optval = NULL;
596 }
597 }
598
599 wordstart = 0;
600
601 /*
602 * Now actually process the option.
603 */
604 switch (optid) {
605 case OPT_HELP:
606 usage(stdout);
607 return 0;
608 case OPT_VERSION:
609 #ifdef PACKAGE_VERSION
610 printf("%s, revision %s\n", PNAME, PACKAGE_VERSION);
611 #else
612 printf("%s: version number not available when not built"
613 " via automake\n", PNAME);
614 #endif
615 return 0;
616 case OPT_LICENCE:
617 {
618 extern const char *const licence[];
619 int i;
620
621 for (i = 0; licence[i]; i++)
622 fputs(licence[i], stdout);
623
624 return 0;
625 }
626 return 0;
627 case OPT_SCAN:
628 if (nactions >= actionsize) {
629 actionsize = nactions * 3 / 2 + 16;
630 actions = sresize(actions, actionsize, struct action);
631 }
632 actions[nactions].mode = SCAN;
633 actions[nactions].arg = optval;
634 nactions++;
635 break;
636 case OPT_SCANDUMP:
637 if (nactions >= actionsize) {
638 actionsize = nactions * 3 / 2 + 16;
639 actions = sresize(actions, actionsize, struct action);
640 }
641 actions[nactions].mode = SCANDUMP;
642 actions[nactions].arg = optval;
643 nactions++;
644 break;
645 case OPT_DUMP:
646 if (nactions >= actionsize) {
647 actionsize = nactions * 3 / 2 + 16;
648 actions = sresize(actions, actionsize, struct action);
649 }
650 actions[nactions].mode = DUMP;
651 actions[nactions].arg = NULL;
652 nactions++;
653 break;
654 case OPT_LOAD:
655 if (nactions >= actionsize) {
656 actionsize = nactions * 3 / 2 + 16;
657 actions = sresize(actions, actionsize, struct action);
658 }
659 actions[nactions].mode = LOAD;
660 actions[nactions].arg = NULL;
661 nactions++;
662 break;
663 case OPT_TEXT:
664 if (nactions >= actionsize) {
665 actionsize = nactions * 3 / 2 + 16;
666 actions = sresize(actions, actionsize, struct action);
667 }
668 actions[nactions].mode = TEXT;
669 actions[nactions].arg = optval;
670 nactions++;
671 break;
672 case OPT_HTML:
673 if (nactions >= actionsize) {
674 actionsize = nactions * 3 / 2 + 16;
675 actions = sresize(actions, actionsize, struct action);
676 }
677 actions[nactions].mode = HTML;
678 actions[nactions].arg = optval;
679 nactions++;
680 break;
681 case OPT_HTTPD:
682 if (nactions >= actionsize) {
683 actionsize = nactions * 3 / 2 + 16;
684 actions = sresize(actions, actionsize, struct action);
685 }
686 actions[nactions].mode = HTTPD;
687 actions[nactions].arg = NULL;
688 nactions++;
689 break;
690 case OPT_REMOVE:
691 if (nactions >= actionsize) {
692 actionsize = nactions * 3 / 2 + 16;
693 actions = sresize(actions, actionsize, struct action);
694 }
695 actions[nactions].mode = REMOVE;
696 actions[nactions].arg = NULL;
697 nactions++;
698 break;
699 case OPT_PROGRESS:
700 progress = 2;
701 break;
702 case OPT_NOPROGRESS:
703 progress = 0;
704 break;
705 case OPT_TTYPROGRESS:
706 progress = 1;
707 break;
708 case OPT_CROSSFS:
709 crossfs = 1;
710 break;
711 case OPT_NOCROSSFS:
712 crossfs = 0;
713 break;
714 case OPT_DIRATIME:
715 fakediratimes = 0;
716 break;
717 case OPT_NODIRATIME:
718 fakediratimes = 1;
719 break;
720 case OPT_MTIME:
721 mtime = 1;
722 break;
723 case OPT_FULL:
724 fullindex = 1;
725 break;
726 case OPT_DATAFILE:
727 filename = optval;
728 break;
729 case OPT_TQDEPTH:
730 tqdepth = atoi(optval);
731 break;
732 case OPT_MINAGE:
733 textcutoff = parse_age(now, optval);
734 break;
735 case OPT_AGERANGE:
736 if (!strcmp(optval, "auto")) {
737 htmlautoagerange = 1;
738 } else {
739 char *q = optval + strcspn(optval, "-:");
740 if (*q)
741 *q++ = '\0';
742 htmloldest = parse_age(now, optval);
743 htmlnewest = *q ? parse_age(now, q) : now;
744 htmlautoagerange = 0;
745 }
746 break;
747 case OPT_SERVERADDR:
748 {
749 char *port;
750 if (optval[0] == '[' &&
751 (port = strchr(optval, ']')) != NULL)
752 port++;
753 else
754 port = optval;
755 port += strcspn(port, ":");
756 if (port)
757 *port++ = '\0';
758 httpserveraddr = optval;
759 httpserverport = atoi(port);
760 }
761 break;
762 case OPT_AUTH:
763 if (!strcmp(optval, "magic"))
764 auth = HTTPD_AUTH_MAGIC;
765 else if (!strcmp(optval, "basic"))
766 auth = HTTPD_AUTH_BASIC;
767 else if (!strcmp(optval, "none"))
768 auth = HTTPD_AUTH_NONE;
769 else if (!strcmp(optval, "default"))
770 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
771 else if (!strcmp(optval, "help") ||
772 !strcmp(optval, "list")) {
773 printf(PNAME ": supported HTTP authentication types"
774 " are:\n"
775 " magic use Linux /proc/net/tcp to"
776 " determine owner of peer socket\n"
777 " basic HTTP Basic username and"
778 " password authentication\n"
779 " default use 'magic' if possible, "
780 " otherwise fall back to 'basic'\n"
781 " none unauthenticated HTTP (if"
782 " the data file is non-confidential)\n");
783 return 0;
784 } else {
785 fprintf(stderr, "%s: unrecognised authentication"
786 " type '%s'\n%*s options are 'magic',"
787 " 'basic', 'none', 'default'\n",
788 PNAME, optval, (int)strlen(PNAME), "");
789 return 1;
790 }
791 break;
792 case OPT_AUTHFILE:
793 case OPT_AUTHFD:
794 {
795 int fd;
796 char namebuf[40];
797 const char *name;
798 char *authbuf;
799 int authlen, authsize;
800 int ret;
801
802 if (optid == OPT_AUTHFILE) {
803 fd = open(optval, O_RDONLY);
804 if (fd < 0) {
805 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
806 optval, strerror(errno));
807 return 1;
808 }
809 name = optval;
810 } else {
811 fd = atoi(optval);
812 name = namebuf;
813 sprintf(namebuf, "fd %d", fd);
814 }
815
816 authlen = 0;
817 authsize = 256;
818 authbuf = snewn(authsize, char);
819 while ((ret = read(fd, authbuf+authlen,
820 authsize-authlen)) > 0) {
821 authlen += ret;
822 if ((authsize - authlen) < (authsize / 16)) {
823 authsize = authlen * 3 / 2 + 4096;
824 authbuf = sresize(authbuf, authsize, char);
825 }
826 }
827 if (ret < 0) {
828 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
829 name, strerror(errno));
830 return 1;
831 }
832 if (optid == OPT_AUTHFILE)
833 close(fd);
834 httpauthdata = authbuf;
835 }
836 break;
837 case OPT_INCLUDE:
838 case OPT_INCLUDEPATH:
839 case OPT_EXCLUDE:
840 case OPT_EXCLUDEPATH:
841 case OPT_PRUNE:
842 case OPT_PRUNEPATH:
843 if (ninex >= inexsize) {
844 inexsize = ninex * 3 / 2 + 16;
845 inex = sresize(inex, inexsize,
846 struct inclusion_exclusion);
847 }
848 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
849 optid == OPT_EXCLUDEPATH ||
850 optid == OPT_PRUNEPATH);
851 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
852 optid == OPT_INCLUDEPATH ? 1 :
853 optid == OPT_EXCLUDE ? 0 :
854 optid == OPT_EXCLUDEPATH ? 0 :
855 optid == OPT_PRUNE ? -1 :
856 /* optid == OPT_PRUNEPATH ? */ -1);
857 inex[ninex].wildcard = optval;
858 ninex++;
859 break;
860 }
861 }
862 } else {
863 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
864 return 1;
865 }
866 }
867
868 if (nactions == 0) {
869 usage(stderr);
870 return 1;
871 }
872
873 for (action = 0; action < nactions; action++) {
874 int mode = actions[action].mode;
875
876 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
877 const char *scandir = actions[action].arg;
878
879 if (mode == LOAD) {
880 char *buf = fgetline(stdin);
881 unsigned newpathsep;
882 buf[strcspn(buf, "\r\n")] = '\0';
883 if (1 != sscanf(buf, DUMPHDR "%x",
884 &newpathsep)) {
885 fprintf(stderr, "%s: header in dump file not recognised\n",
886 PNAME);
887 return 1;
888 }
889 pathsep = (char)newpathsep;
890 sfree(buf);
891 }
892
893 if (mode == SCAN || mode == LOAD) {
894 /*
895 * Prepare to write out the index file.
896 */
897 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT,
898 S_IRUSR | S_IWUSR);
899 if (fd < 0) {
900 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
901 strerror(errno));
902 return 1;
903 }
904 if (fstat(fd, &st) < 0) {
905 perror(PNAME ": fstat");
906 return 1;
907 }
908 ctx->datafile_dev = st.st_dev;
909 ctx->datafile_ino = st.st_ino;
910 ctx->straight_to_dump = 0;
911 } else {
912 ctx->datafile_dev = -1;
913 ctx->datafile_ino = -1;
914 ctx->straight_to_dump = 1;
915 }
916
917 if (mode == SCAN || mode == SCANDUMP) {
918 if (stat(scandir, &st) < 0) {
919 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
920 strerror(errno));
921 return 1;
922 }
923 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
924 }
925
926 ctx->inex = inex;
927 ctx->ninex = ninex;
928 ctx->crossfs = crossfs;
929 ctx->fakeatimes = fakediratimes;
930 ctx->usemtime = mtime;
931
932 ctx->last_output_update = time(NULL);
933
934 /* progress==1 means report progress only if stderr is a tty */
935 if (progress == 1)
936 progress = isatty(2) ? 2 : 0;
937 ctx->progress = progress;
938 {
939 struct winsize ws;
940 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
941 ctx->progwidth = ws.ws_col - 1;
942 else
943 ctx->progwidth = 79;
944 }
945
946 if (mode == SCANDUMP)
947 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
948
949 /*
950 * Scan the directory tree, and write out the trie component
951 * of the data file.
952 */
953 if (mode != SCANDUMP) {
954 ctx->tb = triebuild_new(fd);
955 }
956 if (mode == LOAD) {
957 char *buf;
958 int line = 2;
959 while ((buf = fgetline(stdin)) != NULL) {
960 struct trie_file tf;
961 char *p, *q;
962
963 buf[strcspn(buf, "\r\n")] = '\0';
964
965 p = buf;
966 q = p;
967 while (*p && *p != ' ') p++;
968 if (!*p) {
969 fprintf(stderr, "%s: dump file line %d: expected at least"
970 " three fields\n", PNAME, line);
971 return 1;
972 }
973 *p++ = '\0';
974 tf.size = strtoull(q, NULL, 10);
975 q = p;
976 while (*p && *p != ' ') p++;
977 if (!*p) {
978 fprintf(stderr, "%s: dump file line %d: expected at least"
979 " three fields\n", PNAME, line);
980 return 1;
981 }
982 *p++ = '\0';
983 tf.atime = strtoull(q, NULL, 10);
984 q = buf;
985 while (*p) {
986 int c = *p;
987 if (*p == '%') {
988 int i;
989 p++;
990 c = 0;
991 for (i = 0; i < 2; i++) {
992 c *= 16;
993 if (*p >= '0' && *p <= '9')
994 c += *p - '0';
995 else if (*p >= 'A' && *p <= 'F')
996 c += *p - ('A' - 10);
997 else if (*p >= 'a' && *p <= 'f')
998 c += *p - ('a' - 10);
999 else {
1000 fprintf(stderr, "%s: dump file line %d: unable"
1001 " to parse hex escape\n", PNAME, line);
1002 }
1003 p++;
1004 }
1005 }
1006 *q++ = c;
1007 p++;
1008 }
1009 *q = '\0';
1010 triebuild_add(ctx->tb, buf, &tf);
1011 sfree(buf);
1012 line++;
1013 }
1014 } else {
1015 du(scandir, gotdata, ctx);
1016 }
1017 if (mode != SCANDUMP) {
1018 size_t maxpathlen;
1019 char *buf, *prevbuf;
1020
1021 count = triebuild_finish(ctx->tb);
1022 triebuild_free(ctx->tb);
1023
1024 if (ctx->progress) {
1025 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
1026 fflush(stderr);
1027 }
1028
1029 /*
1030 * Work out how much space the cumulative index trees
1031 * will take; enlarge the file, and memory-map it.
1032 */
1033 if (fstat(fd, &st) < 0) {
1034 perror(PNAME ": fstat");
1035 return 1;
1036 }
1037
1038 printf("Built pathname index, %d entries, %llu bytes\n", count,
1039 (unsigned long long)st.st_size);
1040
1041 totalsize = index_compute_size(st.st_size, count);
1042
1043 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
1044 perror(PNAME ": lseek");
1045 return 1;
1046 }
1047 if (write(fd, "\0", 1) < 1) {
1048 perror(PNAME ": write");
1049 return 1;
1050 }
1051
1052 printf("Upper bound on index file size = %llu bytes\n",
1053 (unsigned long long)totalsize);
1054
1055 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1056 if (!mappedfile) {
1057 perror(PNAME ": mmap");
1058 return 1;
1059 }
1060
1061 if (fakediratimes) {
1062 printf("Faking directory atimes\n");
1063 trie_fake_dir_atimes(mappedfile);
1064 }
1065
1066 printf("Building index\n");
1067 ib = indexbuild_new(mappedfile, st.st_size, count);
1068 maxpathlen = trie_maxpathlen(mappedfile);
1069 buf = snewn(maxpathlen, char);
1070 prevbuf = snewn(maxpathlen, char);
1071 tw = triewalk_new(mappedfile);
1072 prevbuf[0] = '\0';
1073 tf = triewalk_next(tw, buf);
1074 assert(tf);
1075 while (1) {
1076 int i;
1077
1078 /*
1079 * Get the next file from the index. So we are
1080 * currently holding, and have not yet
1081 * indexed, prevtf (with pathname prevbuf) and
1082 * tf (with pathname buf).
1083 */
1084 prevtf = tf;
1085 memcpy(prevbuf, buf, maxpathlen);
1086 tf = triewalk_next(tw, buf);
1087
1088 if (!tf)
1089 buf[0] = '\0';
1090
1091 /*
1092 * Find the first differing character position
1093 * between our two pathnames.
1094 */
1095 for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++);
1096
1097 /*
1098 * If prevbuf was a directory name and buf is
1099 * something inside that directory, then
1100 * trie_before() will be called on prevbuf
1101 * itself. Hence we must drop a tag before it,
1102 * so that the resulting index is usable.
1103 */
1104 if ((!prevbuf[i] && (buf[i] == pathsep ||
1105 (i > 0 && buf[i-1] == pathsep))))
1106 indexbuild_tag(ib);
1107
1108 /*
1109 * Add prevtf to the index.
1110 */
1111 indexbuild_add(ib, prevtf);
1112
1113 if (!tf) {
1114 /*
1115 * Drop an unconditional final tag, and
1116 * get out of this loop.
1117 */
1118 indexbuild_tag(ib);
1119 break;
1120 }
1121
1122 /*
1123 * In full-index mode, index everything.
1124 */
1125 if (fullindex)
1126 indexbuild_tag(ib);
1127
1128 /*
1129 * If prevbuf was a filename inside some
1130 * directory which buf is outside, then
1131 * trie_before() will be called on some
1132 * pathname either equal to buf or epsilon
1133 * less than it. Either way, we're going to
1134 * need to drop a tag after prevtf.
1135 */
1136 if (strchr(prevbuf+i, pathsep) || !tf)
1137 indexbuild_tag(ib);
1138 }
1139
1140 triewalk_free(tw);
1141 realsize = indexbuild_realsize(ib);
1142 indexbuild_free(ib);
1143
1144 munmap(mappedfile, totalsize);
1145 ftruncate(fd, realsize);
1146 close(fd);
1147 printf("Actual index file size = %llu bytes\n",
1148 (unsigned long long)realsize);
1149 }
1150 } else if (mode == TEXT) {
1151 char *querydir = actions[action].arg;
1152 size_t pathlen;
1153
1154 fd = open(filename, O_RDONLY);
1155 if (fd < 0) {
1156 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1157 strerror(errno));
1158 return 1;
1159 }
1160 if (fstat(fd, &st) < 0) {
1161 perror(PNAME ": fstat");
1162 return 1;
1163 }
1164 totalsize = st.st_size;
1165 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1166 if (!mappedfile) {
1167 perror(PNAME ": mmap");
1168 return 1;
1169 }
1170 pathsep = trie_pathsep(mappedfile);
1171
1172 /*
1173 * Trim trailing slash, just in case.
1174 */
1175 pathlen = strlen(querydir);
1176 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1177 querydir[--pathlen] = '\0';
1178
1179 text_query(mappedfile, querydir, textcutoff, tqdepth);
1180
1181 munmap(mappedfile, totalsize);
1182 } else if (mode == HTML) {
1183 char *querydir = actions[action].arg;
1184 size_t pathlen;
1185 struct html_config cfg;
1186 unsigned long xi;
1187 char *html;
1188
1189 fd = open(filename, O_RDONLY);
1190 if (fd < 0) {
1191 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1192 strerror(errno));
1193 return 1;
1194 }
1195 if (fstat(fd, &st) < 0) {
1196 perror(PNAME ": fstat");
1197 return 1;
1198 }
1199 totalsize = st.st_size;
1200 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1201 if (!mappedfile) {
1202 perror(PNAME ": mmap");
1203 return 1;
1204 }
1205 pathsep = trie_pathsep(mappedfile);
1206
1207 /*
1208 * Trim trailing slash, just in case.
1209 */
1210 pathlen = strlen(querydir);
1211 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1212 querydir[--pathlen] = '\0';
1213
1214 xi = trie_before(mappedfile, querydir);
1215 cfg.format = NULL;
1216 cfg.autoage = htmlautoagerange;
1217 cfg.oldest = htmloldest;
1218 cfg.newest = htmlnewest;
1219 html = html_query(mappedfile, xi, &cfg);
1220 fputs(html, stdout);
1221
1222 munmap(mappedfile, totalsize);
1223 } else if (mode == DUMP) {
1224 size_t maxpathlen;
1225 char *buf;
1226
1227 fd = open(filename, O_RDONLY);
1228 if (fd < 0) {
1229 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1230 strerror(errno));
1231 return 1;
1232 }
1233 if (fstat(fd, &st) < 0) {
1234 perror(PNAME ": fstat");
1235 return 1;
1236 }
1237 totalsize = st.st_size;
1238 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1239 if (!mappedfile) {
1240 perror(PNAME ": mmap");
1241 return 1;
1242 }
1243 pathsep = trie_pathsep(mappedfile);
1244
1245 maxpathlen = trie_maxpathlen(mappedfile);
1246 buf = snewn(maxpathlen, char);
1247
1248 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
1249 tw = triewalk_new(mappedfile);
1250 while ((tf = triewalk_next(tw, buf)) != NULL)
1251 dump_line(buf, tf);
1252 triewalk_free(tw);
1253
1254 munmap(mappedfile, totalsize);
1255 } else if (mode == HTTPD) {
1256 struct html_config pcfg;
1257 struct httpd_config dcfg;
1258
1259 fd = open(filename, O_RDONLY);
1260 if (fd < 0) {
1261 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1262 strerror(errno));
1263 return 1;
1264 }
1265 if (fstat(fd, &st) < 0) {
1266 perror(PNAME ": fstat");
1267 return 1;
1268 }
1269 totalsize = st.st_size;
1270 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1271 if (!mappedfile) {
1272 perror(PNAME ": mmap");
1273 return 1;
1274 }
1275 pathsep = trie_pathsep(mappedfile);
1276
1277 dcfg.address = httpserveraddr;
1278 dcfg.port = httpserverport;
1279 dcfg.basicauthdata = httpauthdata;
1280 pcfg.format = NULL;
1281 pcfg.autoage = htmlautoagerange;
1282 pcfg.oldest = htmloldest;
1283 pcfg.newest = htmlnewest;
1284 run_httpd(mappedfile, auth, &dcfg, &pcfg);
1285 munmap(mappedfile, totalsize);
1286 } else if (mode == REMOVE) {
1287 if (remove(filename) < 0) {
1288 fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename,
1289 strerror(errno));
1290 return 1;
1291 }
1292 }
1293 }
1294
1295 return 0;
1296 }