99fa1f8a75f5ae5b71fc2d6c5df7e83d89645d34
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #include "agedu.h"
6
7 #include "du.h"
8 #include "trie.h"
9 #include "index.h"
10 #include "alloc.h"
11 #include "html.h"
12 #include "httpd.h"
13 #include "fgetline.h"
14
15 /*
16 * Path separator. This global variable affects the behaviour of
17 * various parts of the code when they need to deal with path
18 * separators. The path separator appropriate to a particular data
19 * set is encoded in the index file storing that data set; data
20 * sets generated on Unix will of course have the default '/', but
21 * foreign data sets are conceivable and must be handled correctly.
22 */
23 char pathsep = '/';
24
25 void fatal(const char *fmt, ...)
26 {
27 va_list ap;
28 fprintf(stderr, "%s: ", PNAME);
29 va_start(ap, fmt);
30 vfprintf(stderr, fmt, ap);
31 va_end(ap);
32 fprintf(stderr, "\n");
33 exit(1);
34 }
35
36 struct inclusion_exclusion {
37 int type;
38 const char *wildcard;
39 int path;
40 };
41
42 struct ctx {
43 triebuild *tb;
44 dev_t datafile_dev, filesystem_dev;
45 ino_t datafile_ino;
46 time_t last_output_update;
47 int progress, progwidth;
48 int straight_to_dump;
49 struct inclusion_exclusion *inex;
50 int ninex;
51 int crossfs;
52 int usemtime;
53 int fakeatimes;
54 };
55
56 static void dump_line(const char *pathname, const struct trie_file *tf)
57 {
58 const char *p;
59 printf("%llu %llu ", tf->size, tf->atime);
60 for (p = pathname; *p; p++) {
61 if (*p >= ' ' && *p < 127 && *p != '%')
62 putchar(*p);
63 else
64 printf("%%%02x", (unsigned char)*p);
65 }
66 putchar('\n');
67 }
68
69 static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
70 {
71 struct ctx *ctx = (struct ctx *)vctx;
72 struct trie_file file;
73 time_t t;
74 int i, include;
75 const char *filename;
76
77 /*
78 * Filter out our own data file.
79 */
80 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
81 return 0;
82
83 /*
84 * Don't cross the streams^W^Wany file system boundary.
85 */
86 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
87 return 0;
88
89 file.size = (unsigned long long)512 * st->st_blocks;
90 if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode)))
91 file.atime = st->st_mtime;
92 else
93 file.atime = max(st->st_mtime, st->st_atime);
94
95 /*
96 * Filter based on wildcards.
97 */
98 include = 1;
99 filename = strrchr(pathname, pathsep);
100 if (!filename)
101 filename = pathname;
102 else
103 filename++;
104 for (i = 0; i < ctx->ninex; i++) {
105 if (fnmatch(ctx->inex[i].wildcard,
106 ctx->inex[i].path ? pathname : filename, 0) == 0)
107 include = ctx->inex[i].type;
108 }
109 if (include == -1)
110 return 0; /* ignore this entry and any subdirs */
111 if (include == 0) {
112 /*
113 * Here we are supposed to be filtering an entry out, but
114 * still recursing into it if it's a directory. However,
115 * we can't actually leave out any directory whose
116 * subdirectories we then look at. So we cheat, in that
117 * case, by setting the size to zero.
118 */
119 if (!S_ISDIR(st->st_mode))
120 return 0; /* just ignore */
121 else
122 file.size = 0;
123 }
124
125 if (ctx->straight_to_dump)
126 dump_line(pathname, &file);
127 else
128 triebuild_add(ctx->tb, pathname, &file);
129
130 if (ctx->progress) {
131 t = time(NULL);
132 if (t != ctx->last_output_update) {
133 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
134 pathname);
135 fflush(stderr);
136 ctx->last_output_update = t;
137 }
138 }
139
140 return 1;
141 }
142
143 static void scan_error(void *vctx, const char *fmt, ...)
144 {
145 struct ctx *ctx = (struct ctx *)vctx;
146 va_list ap;
147
148 if (ctx->progress) {
149 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
150 fflush(stderr);
151 }
152
153 fprintf(stderr, "%s: ", PNAME);
154 va_start(ap, fmt);
155 vfprintf(stderr, fmt, ap);
156 va_end(ap);
157
158 ctx->last_output_update--; /* force a progress report next time */
159 }
160
161 static void text_query(const void *mappedfile, const char *querydir,
162 time_t t, int depth)
163 {
164 size_t maxpathlen;
165 char *pathbuf;
166 unsigned long xi1, xi2;
167 unsigned long long s1, s2;
168
169 maxpathlen = trie_maxpathlen(mappedfile);
170 pathbuf = snewn(maxpathlen + 1, char);
171
172 /*
173 * We want to query everything between the supplied filename
174 * (inclusive) and that filename with a ^A on the end
175 * (exclusive). So find the x indices for each.
176 */
177 strcpy(pathbuf, querydir);
178 make_successor(pathbuf);
179 xi1 = trie_before(mappedfile, querydir);
180 xi2 = trie_before(mappedfile, pathbuf);
181
182 if (xi2 - xi1 == 1)
183 return; /* file, or empty dir => no display */
184
185 /*
186 * Now do the lookups in the age index.
187 */
188 s1 = index_query(mappedfile, xi1, t);
189 s2 = index_query(mappedfile, xi2, t);
190
191 if (s1 == s2)
192 return; /* no space taken up => no display */
193
194 if (depth > 0) {
195 /*
196 * Now scan for first-level subdirectories and report
197 * those too.
198 */
199 xi1++;
200 while (xi1 < xi2) {
201 trie_getpath(mappedfile, xi1, pathbuf);
202 text_query(mappedfile, pathbuf, t, depth-1);
203 make_successor(pathbuf);
204 xi1 = trie_before(mappedfile, pathbuf);
205 }
206 }
207
208 /* Display in units of 1Kb */
209 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
210 }
211
212 /*
213 * Largely frivolous way to define all my command-line options. I
214 * present here a parametric macro which declares a series of
215 * _logical_ option identifiers, and for each one declares zero or
216 * more short option characters and zero or more long option
217 * words. Then I repeatedly invoke that macro with its arguments
218 * defined to be various other macros, which allows me to
219 * variously:
220 *
221 * - define an enum allocating a distinct integer value to each
222 * logical option id
223 * - define a string consisting of precisely all the short option
224 * characters
225 * - define a string array consisting of all the long option
226 * strings
227 * - define (with help from auxiliary enums) integer arrays
228 * parallel to both of the above giving the logical option id
229 * for each physical short and long option
230 * - define an array indexed by logical option id indicating
231 * whether the option in question takes a value
232 * - define a function which prints out brief online help for all
233 * the options.
234 *
235 * It's not at all clear to me that this trickery is actually
236 * particularly _efficient_ - it still, after all, requires going
237 * linearly through the option list at run time and doing a
238 * strcmp, whereas in an ideal world I'd have liked the lists of
239 * long and short options to be pre-sorted so that a binary search
240 * or some other more efficient lookup was possible. (Not that
241 * asymptotic algorithmic complexity is remotely vital in option
242 * parsing, but if I were doing this in, say, Lisp or something
243 * with an equivalently powerful preprocessor then once I'd had
244 * the idea of preparing the option-parsing data structures at
245 * compile time I would probably have made the effort to prepare
246 * them _properly_. I could have Perl generate me a source file
247 * from some sort of description, I suppose, but that would seem
248 * like overkill. And in any case, it's more of a challenge to
249 * achieve as much as possible by cunning use of cpp and enum than
250 * to just write some sensible and logical code in a Turing-
251 * complete language. I said it was largely frivolous :-)
252 *
253 * This approach does have the virtue that it brings together the
254 * option ids, option spellings and help text into a single
255 * combined list and defines them all in exactly one place. If I
256 * want to add a new option, or a new spelling for an option, I
257 * only have to modify the main OPTHELP macro below and then add
258 * code to process the new logical id.
259 *
260 * (Though, really, even that isn't ideal, since it still involves
261 * modifying the source file in more than one place. In a
262 * _properly_ ideal world, I'd be able to interleave the option
263 * definitions with the code fragments that process them. And then
264 * not bother defining logical identifiers for them at all - those
265 * would be automatically generated, since I wouldn't have any
266 * need to specify them manually in another part of the code.)
267 *
268 * One other helpful consequence of the enum-based structure here
269 * is that it causes a compiler error if I accidentally try to
270 * define the same option (short or long) twice.
271 */
272
273 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
274 HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
275 HELPPFX("actions") \
276 VAL(SCAN) SHORT(s) LONG(scan) \
277 HELPARG("directory") HELPOPT("scan and index a directory") \
278 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
279 HELPOPT("serve HTML reports from a temporary web server") \
280 VAL(TEXT) SHORT(t) LONG(text) \
281 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
282 NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \
283 HELPOPT("remove the index file") \
284 NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \
285 NOVAL(LOAD) SHORT(L) LONG(load) \
286 HELPOPT("load and index a dump file") \
287 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
288 HELPARG("directory") HELPOPT("scan only, generating a dump") \
289 VAL(HTML) SHORT(H) LONG(html) \
290 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
291 HELPPFX("options") \
292 VAL(DATAFILE) SHORT(f) LONG(file) \
293 HELPARG("filename") HELPOPT("[most modes] specify index file") \
294 NOVAL(CROSSFS) LONG(cross_fs) \
295 HELPOPT("[--scan] cross filesystem boundaries") \
296 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
297 HELPOPT("[--scan] stick to one filesystem") \
298 VAL(PRUNE) LONG(prune) \
299 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
300 VAL(PRUNEPATH) LONG(prune_path) \
301 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
302 VAL(EXCLUDE) LONG(exclude) \
303 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
304 VAL(EXCLUDEPATH) LONG(exclude_path) \
305 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
306 VAL(INCLUDE) LONG(include) \
307 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
308 VAL(INCLUDEPATH) LONG(include_path) \
309 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
310 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
311 HELPOPT("[--scan] report progress on stderr") \
312 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
313 HELPOPT("[--scan] do not report progress") \
314 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
315 LONG(progress_tty) LONG(scan_progress_tty) \
316 HELPOPT("[--scan] report progress if stderr is a tty") \
317 NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \
318 HELPOPT("[--scan,--load] keep real atimes on directories") \
319 NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
320 HELPOPT("[--scan,--load] fake atimes on directories") \
321 NOVAL(MTIME) LONG(mtime) \
322 HELPOPT("[--scan] use mtime instead of atime") \
323 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
324 HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \
325 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
326 LONG(server_addr) \
327 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
328 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
329 LONG(server_auth) LONG(web_auth) \
330 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
331 VAL(AUTHFILE) LONG(auth_file) \
332 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
333 VAL(AUTHFD) LONG(auth_fd) \
334 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
335 VAL(TQDEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
336 HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \
337 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
338 HELPARG("age") HELPOPT("[--text] include only files older than this") \
339 HELPPFX("also") \
340 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
341 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
342 NOVAL(LICENCE) LONG(licence) LONG(license) \
343 HELPOPT("display (MIT) licence text") \
344
345 #define IGNORE(x)
346 #define DEFENUM(x) OPT_ ## x,
347 #define ZERO(x) 0,
348 #define ONE(x) 1,
349 #define STRING(x) #x ,
350 #define STRINGNOCOMMA(x) #x
351 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
352 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
353 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
354 #define SHORTTMP(x) SHORTtmp3_ ## x,
355 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
356 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
357 #define LONGOPTVAL(x) LONGVAL_ ## x,
358 #define LONGTMP(x) SHORTtmp3_ ## x,
359
360 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
361 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
362
363 enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
364 enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
365 enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
366 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
367 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
368 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
369 enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
370 enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
371 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
372 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
373
374 static void usage(FILE *fp)
375 {
376 char longbuf[80];
377 const char *prefix, *shortopt, *longopt, *optarg;
378 int i, optex;
379
380 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
381 #define HELPNOVAL(s) optex = 0;
382 #define HELPVAL(s) optex = 1;
383 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
384 #define HELPLONG(s) if (!longopt) { \
385 strcpy(longbuf, "--" #s); longopt = longbuf; \
386 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
387 #define HELPPFX(s) prefix = s;
388 #define HELPARG(s) optarg = s;
389 #define HELPLINE(s) assert(optex == -1); \
390 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
391 HELPRESET;
392 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
393 assert(shortopt || longopt); \
394 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
395 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
396 optarg?" ":"", optarg?optarg:""); \
397 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
398
399 HELPRESET;
400 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
401 HELPPFX, HELPARG, HELPLINE, HELPOPT);
402
403 #undef HELPRESET
404 #undef HELPNOVAL
405 #undef HELPVAL
406 #undef HELPSHORT
407 #undef HELPLONG
408 #undef HELPPFX
409 #undef HELPARG
410 #undef HELPLINE
411 #undef HELPOPT
412 }
413
414 static time_t parse_age(time_t now, const char *agestr)
415 {
416 time_t t;
417 struct tm tm;
418 int nunits;
419 char unit[2];
420
421 t = now;
422
423 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
424 fprintf(stderr, "%s: age specification should be a number followed by"
425 " one of d,w,m,y\n", PNAME);
426 exit(1);
427 }
428
429 if (unit[0] == 'd') {
430 t -= 86400 * nunits;
431 } else if (unit[0] == 'w') {
432 t -= 86400 * 7 * nunits;
433 } else {
434 int ym;
435
436 tm = *localtime(&t);
437 ym = tm.tm_year * 12 + tm.tm_mon;
438
439 if (unit[0] == 'm')
440 ym -= nunits;
441 else
442 ym -= 12 * nunits;
443
444 tm.tm_year = ym / 12;
445 tm.tm_mon = ym % 12;
446
447 t = mktime(&tm);
448 }
449
450 return t;
451 }
452
453 int main(int argc, char **argv)
454 {
455 int fd, count;
456 struct ctx actx, *ctx = &actx;
457 struct stat st;
458 off_t totalsize, realsize;
459 void *mappedfile;
460 triewalk *tw;
461 indexbuild *ib;
462 const struct trie_file *tf, *prevtf;
463 char *filename = PNAME ".dat";
464 int doing_opts = 1;
465 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE };
466 struct action {
467 int mode;
468 char *arg;
469 } *actions = NULL;
470 int nactions = 0, actionsize = 0, action;
471 time_t now = time(NULL);
472 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
473 int htmlautoagerange = 1;
474 const char *httpserveraddr = NULL;
475 int httpserverport = 0;
476 const char *httpauthdata = NULL;
477 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
478 int progress = 1;
479 struct inclusion_exclusion *inex = NULL;
480 int ninex = 0, inexsize = 0;
481 int crossfs = 0;
482 int tqdepth = 1;
483 int fakediratimes = 1;
484 int mtime = 0;
485
486 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
487 {
488 static const char *const optnames[NOPTIONS] = {
489 OPTIONS(STRING,STRING,IGNORE,IGNORE)
490 };
491 int i;
492 for (i = 0; i < NSHORTOPTS; i++)
493 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
494 opthasval[shortvals[i]] ? "value" : "no value");
495 for (i = 0; i < NLONGOPTS; i++)
496 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
497 opthasval[longvals[i]] ? "value" : "no value");
498 }
499 #endif
500
501 while (--argc > 0) {
502 char *p = *++argv;
503
504 if (doing_opts && *p == '-') {
505 int wordstart = 1;
506
507 if (!strcmp(p, "--")) {
508 doing_opts = 0;
509 continue;
510 }
511
512 p++;
513 while (*p) {
514 int optid = -1;
515 int i;
516 char *optval;
517
518 if (wordstart && *p == '-') {
519 /*
520 * GNU-style long option.
521 */
522 p++;
523 optval = strchr(p, '=');
524 if (optval)
525 *optval++ = '\0';
526
527 for (i = 0; i < NLONGOPTS; i++) {
528 const char *opt = longopts[i], *s = p;
529 int match = 1;
530 /*
531 * The underscores in the option names
532 * defined above may be given by the user
533 * as underscores or dashes, or omitted
534 * entirely.
535 */
536 while (*opt) {
537 if (*opt == '_') {
538 if (*s == '-' || *s == '_')
539 s++;
540 } else {
541 if (*opt != *s) {
542 match = 0;
543 break;
544 }
545 s++;
546 }
547 opt++;
548 }
549 if (match && !*s) {
550 optid = longvals[i];
551 break;
552 }
553 }
554
555 if (optid < 0) {
556 fprintf(stderr, "%s: unrecognised option '--%s'\n",
557 PNAME, p);
558 return 1;
559 }
560
561 if (!opthasval[optid]) {
562 if (optval) {
563 fprintf(stderr, "%s: unexpected argument to option"
564 " '--%s'\n", PNAME, p);
565 return 1;
566 }
567 } else {
568 if (!optval) {
569 if (--argc > 0) {
570 optval = *++argv;
571 } else {
572 fprintf(stderr, "%s: option '--%s' expects"
573 " an argument\n", PNAME, p);
574 return 1;
575 }
576 }
577 }
578
579 p += strlen(p); /* finished with this argument word */
580 } else {
581 /*
582 * Short option.
583 */
584 char c = *p++;
585
586 for (i = 0; i < NSHORTOPTS; i++)
587 if (c == shortopts[i]) {
588 optid = shortvals[i];
589 break;
590 }
591
592 if (optid < 0) {
593 fprintf(stderr, "%s: unrecognised option '-%c'\n",
594 PNAME, c);
595 return 1;
596 }
597
598 if (opthasval[optid]) {
599 if (*p) {
600 optval = p;
601 p += strlen(p);
602 } else if (--argc > 0) {
603 optval = *++argv;
604 } else {
605 fprintf(stderr, "%s: option '-%c' expects"
606 " an argument\n", PNAME, c);
607 return 1;
608 }
609 } else {
610 optval = NULL;
611 }
612 }
613
614 wordstart = 0;
615
616 /*
617 * Now actually process the option.
618 */
619 switch (optid) {
620 case OPT_HELP:
621 usage(stdout);
622 return 0;
623 case OPT_VERSION:
624 #ifdef PACKAGE_VERSION
625 printf("%s, revision %s\n", PNAME, PACKAGE_VERSION);
626 #else
627 printf("%s: version number not available when not built"
628 " via automake\n", PNAME);
629 #endif
630 return 0;
631 case OPT_LICENCE:
632 {
633 extern const char *const licence[];
634 int i;
635
636 for (i = 0; licence[i]; i++)
637 fputs(licence[i], stdout);
638
639 return 0;
640 }
641 return 0;
642 case OPT_SCAN:
643 if (nactions >= actionsize) {
644 actionsize = nactions * 3 / 2 + 16;
645 actions = sresize(actions, actionsize, struct action);
646 }
647 actions[nactions].mode = SCAN;
648 actions[nactions].arg = optval;
649 nactions++;
650 break;
651 case OPT_SCANDUMP:
652 if (nactions >= actionsize) {
653 actionsize = nactions * 3 / 2 + 16;
654 actions = sresize(actions, actionsize, struct action);
655 }
656 actions[nactions].mode = SCANDUMP;
657 actions[nactions].arg = optval;
658 nactions++;
659 break;
660 case OPT_DUMP:
661 if (nactions >= actionsize) {
662 actionsize = nactions * 3 / 2 + 16;
663 actions = sresize(actions, actionsize, struct action);
664 }
665 actions[nactions].mode = DUMP;
666 actions[nactions].arg = NULL;
667 nactions++;
668 break;
669 case OPT_LOAD:
670 if (nactions >= actionsize) {
671 actionsize = nactions * 3 / 2 + 16;
672 actions = sresize(actions, actionsize, struct action);
673 }
674 actions[nactions].mode = LOAD;
675 actions[nactions].arg = NULL;
676 nactions++;
677 break;
678 case OPT_TEXT:
679 if (nactions >= actionsize) {
680 actionsize = nactions * 3 / 2 + 16;
681 actions = sresize(actions, actionsize, struct action);
682 }
683 actions[nactions].mode = TEXT;
684 actions[nactions].arg = optval;
685 nactions++;
686 break;
687 case OPT_HTML:
688 if (nactions >= actionsize) {
689 actionsize = nactions * 3 / 2 + 16;
690 actions = sresize(actions, actionsize, struct action);
691 }
692 actions[nactions].mode = HTML;
693 actions[nactions].arg = optval;
694 nactions++;
695 break;
696 case OPT_HTTPD:
697 if (nactions >= actionsize) {
698 actionsize = nactions * 3 / 2 + 16;
699 actions = sresize(actions, actionsize, struct action);
700 }
701 actions[nactions].mode = HTTPD;
702 actions[nactions].arg = NULL;
703 nactions++;
704 break;
705 case OPT_REMOVE:
706 if (nactions >= actionsize) {
707 actionsize = nactions * 3 / 2 + 16;
708 actions = sresize(actions, actionsize, struct action);
709 }
710 actions[nactions].mode = REMOVE;
711 actions[nactions].arg = NULL;
712 nactions++;
713 break;
714 case OPT_PROGRESS:
715 progress = 2;
716 break;
717 case OPT_NOPROGRESS:
718 progress = 0;
719 break;
720 case OPT_TTYPROGRESS:
721 progress = 1;
722 break;
723 case OPT_CROSSFS:
724 crossfs = 1;
725 break;
726 case OPT_NOCROSSFS:
727 crossfs = 0;
728 break;
729 case OPT_DIRATIME:
730 fakediratimes = 0;
731 break;
732 case OPT_NODIRATIME:
733 fakediratimes = 1;
734 break;
735 case OPT_MTIME:
736 mtime = 1;
737 break;
738 case OPT_DATAFILE:
739 filename = optval;
740 break;
741 case OPT_TQDEPTH:
742 tqdepth = atoi(optval);
743 break;
744 case OPT_MINAGE:
745 textcutoff = parse_age(now, optval);
746 break;
747 case OPT_AGERANGE:
748 if (!strcmp(optval, "auto")) {
749 htmlautoagerange = 1;
750 } else {
751 char *q = optval + strcspn(optval, "-:");
752 if (*q)
753 *q++ = '\0';
754 htmloldest = parse_age(now, optval);
755 htmlnewest = *q ? parse_age(now, q) : now;
756 htmlautoagerange = 0;
757 }
758 break;
759 case OPT_SERVERADDR:
760 {
761 char *port;
762 if (optval[0] == '[' &&
763 (port = strchr(optval, ']')) != NULL)
764 port++;
765 else
766 port = optval;
767 port += strcspn(port, ":");
768 if (port)
769 *port++ = '\0';
770 httpserveraddr = optval;
771 httpserverport = atoi(port);
772 }
773 break;
774 case OPT_AUTH:
775 if (!strcmp(optval, "magic"))
776 auth = HTTPD_AUTH_MAGIC;
777 else if (!strcmp(optval, "basic"))
778 auth = HTTPD_AUTH_BASIC;
779 else if (!strcmp(optval, "none"))
780 auth = HTTPD_AUTH_NONE;
781 else if (!strcmp(optval, "default"))
782 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
783 else if (!strcmp(optval, "help") ||
784 !strcmp(optval, "list")) {
785 printf(PNAME ": supported HTTP authentication types"
786 " are:\n"
787 " magic use Linux /proc/net/tcp to"
788 " determine owner of peer socket\n"
789 " basic HTTP Basic username and"
790 " password authentication\n"
791 " default use 'magic' if possible, "
792 " otherwise fall back to 'basic'\n"
793 " none unauthenticated HTTP (if"
794 " the data file is non-confidential)\n");
795 return 0;
796 } else {
797 fprintf(stderr, "%s: unrecognised authentication"
798 " type '%s'\n%*s options are 'magic',"
799 " 'basic', 'none', 'default'\n",
800 PNAME, optval, (int)strlen(PNAME), "");
801 return 1;
802 }
803 break;
804 case OPT_AUTHFILE:
805 case OPT_AUTHFD:
806 {
807 int fd;
808 char namebuf[40];
809 const char *name;
810 char *authbuf;
811 int authlen, authsize;
812 int ret;
813
814 if (optid == OPT_AUTHFILE) {
815 fd = open(optval, O_RDONLY);
816 if (fd < 0) {
817 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
818 optval, strerror(errno));
819 return 1;
820 }
821 name = optval;
822 } else {
823 fd = atoi(optval);
824 name = namebuf;
825 sprintf(namebuf, "fd %d", fd);
826 }
827
828 authlen = 0;
829 authsize = 256;
830 authbuf = snewn(authsize, char);
831 while ((ret = read(fd, authbuf+authlen,
832 authsize-authlen)) > 0) {
833 authlen += ret;
834 if ((authsize - authlen) < (authsize / 16)) {
835 authsize = authlen * 3 / 2 + 4096;
836 authbuf = sresize(authbuf, authsize, char);
837 }
838 }
839 if (ret < 0) {
840 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
841 name, strerror(errno));
842 return 1;
843 }
844 if (optid == OPT_AUTHFILE)
845 close(fd);
846 httpauthdata = authbuf;
847 }
848 break;
849 case OPT_INCLUDE:
850 case OPT_INCLUDEPATH:
851 case OPT_EXCLUDE:
852 case OPT_EXCLUDEPATH:
853 case OPT_PRUNE:
854 case OPT_PRUNEPATH:
855 if (ninex >= inexsize) {
856 inexsize = ninex * 3 / 2 + 16;
857 inex = sresize(inex, inexsize,
858 struct inclusion_exclusion);
859 }
860 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
861 optid == OPT_EXCLUDEPATH ||
862 optid == OPT_PRUNEPATH);
863 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
864 optid == OPT_INCLUDEPATH ? 1 :
865 optid == OPT_EXCLUDE ? 0 :
866 optid == OPT_EXCLUDEPATH ? 0 :
867 optid == OPT_PRUNE ? -1 :
868 /* optid == OPT_PRUNEPATH ? */ -1);
869 inex[ninex].wildcard = optval;
870 ninex++;
871 break;
872 }
873 }
874 } else {
875 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
876 return 1;
877 }
878 }
879
880 if (nactions == 0) {
881 usage(stderr);
882 return 1;
883 }
884
885 for (action = 0; action < nactions; action++) {
886 int mode = actions[action].mode;
887
888 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
889 const char *scandir = actions[action].arg;
890
891 if (mode == LOAD) {
892 char *buf = fgetline(stdin);
893 unsigned newpathsep;
894 buf[strcspn(buf, "\r\n")] = '\0';
895 if (1 != sscanf(buf, DUMPHDR "%x",
896 &newpathsep)) {
897 fprintf(stderr, "%s: header in dump file not recognised\n",
898 PNAME);
899 return 1;
900 }
901 pathsep = (char)newpathsep;
902 sfree(buf);
903 }
904
905 if (mode == SCAN || mode == LOAD) {
906 /*
907 * Prepare to write out the index file.
908 */
909 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT,
910 S_IRUSR | S_IWUSR);
911 if (fd < 0) {
912 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
913 strerror(errno));
914 return 1;
915 }
916 if (fstat(fd, &st) < 0) {
917 perror(PNAME ": fstat");
918 return 1;
919 }
920 ctx->datafile_dev = st.st_dev;
921 ctx->datafile_ino = st.st_ino;
922 ctx->straight_to_dump = 0;
923 } else {
924 ctx->datafile_dev = -1;
925 ctx->datafile_ino = -1;
926 ctx->straight_to_dump = 1;
927 }
928
929 if (mode == SCAN || mode == SCANDUMP) {
930 if (stat(scandir, &st) < 0) {
931 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
932 strerror(errno));
933 return 1;
934 }
935 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
936 }
937
938 ctx->inex = inex;
939 ctx->ninex = ninex;
940 ctx->crossfs = crossfs;
941 ctx->fakeatimes = fakediratimes;
942 ctx->usemtime = mtime;
943
944 ctx->last_output_update = time(NULL);
945
946 /* progress==1 means report progress only if stderr is a tty */
947 if (progress == 1)
948 progress = isatty(2) ? 2 : 0;
949 ctx->progress = progress;
950 {
951 struct winsize ws;
952 if (progress &&
953 ioctl(2, TIOCGWINSZ, &ws) == 0 &&
954 ws.ws_col > 0)
955 ctx->progwidth = ws.ws_col - 1;
956 else
957 ctx->progwidth = 79;
958 }
959
960 if (mode == SCANDUMP)
961 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
962
963 /*
964 * Scan the directory tree, and write out the trie component
965 * of the data file.
966 */
967 if (mode != SCANDUMP) {
968 ctx->tb = triebuild_new(fd);
969 }
970 if (mode == LOAD) {
971 char *buf;
972 int line = 2;
973 while ((buf = fgetline(stdin)) != NULL) {
974 struct trie_file tf;
975 char *p, *q;
976
977 buf[strcspn(buf, "\r\n")] = '\0';
978
979 p = buf;
980 q = p;
981 while (*p && *p != ' ') p++;
982 if (!*p) {
983 fprintf(stderr, "%s: dump file line %d: expected at least"
984 " three fields\n", PNAME, line);
985 return 1;
986 }
987 *p++ = '\0';
988 tf.size = strtoull(q, NULL, 10);
989 q = p;
990 while (*p && *p != ' ') p++;
991 if (!*p) {
992 fprintf(stderr, "%s: dump file line %d: expected at least"
993 " three fields\n", PNAME, line);
994 return 1;
995 }
996 *p++ = '\0';
997 tf.atime = strtoull(q, NULL, 10);
998 q = buf;
999 while (*p) {
1000 int c = *p;
1001 if (*p == '%') {
1002 int i;
1003 p++;
1004 c = 0;
1005 for (i = 0; i < 2; i++) {
1006 c *= 16;
1007 if (*p >= '0' && *p <= '9')
1008 c += *p - '0';
1009 else if (*p >= 'A' && *p <= 'F')
1010 c += *p - ('A' - 10);
1011 else if (*p >= 'a' && *p <= 'f')
1012 c += *p - ('a' - 10);
1013 else {
1014 fprintf(stderr, "%s: dump file line %d: unable"
1015 " to parse hex escape\n", PNAME, line);
1016 }
1017 p++;
1018 }
1019 }
1020 *q++ = c;
1021 p++;
1022 }
1023 *q = '\0';
1024 triebuild_add(ctx->tb, buf, &tf);
1025 sfree(buf);
1026 line++;
1027 }
1028 } else {
1029 du(scandir, gotdata, scan_error, ctx);
1030 }
1031 if (mode != SCANDUMP) {
1032 size_t maxpathlen;
1033 size_t delta;
1034 char *buf, *prevbuf;
1035
1036 count = triebuild_finish(ctx->tb);
1037 triebuild_free(ctx->tb);
1038
1039 if (ctx->progress) {
1040 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
1041 fflush(stderr);
1042 }
1043
1044 /*
1045 * Work out how much space the cumulative index trees
1046 * will take; enlarge the file, and memory-map it.
1047 */
1048 if (fstat(fd, &st) < 0) {
1049 perror(PNAME ": fstat");
1050 return 1;
1051 }
1052
1053 printf("Built pathname index, %d entries,"
1054 " %llu bytes of index\n", count,
1055 (unsigned long long)st.st_size);
1056
1057 totalsize = index_initial_size(st.st_size, count);
1058 totalsize += totalsize / 10;
1059
1060 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
1061 perror(PNAME ": lseek");
1062 return 1;
1063 }
1064 if (write(fd, "\0", 1) < 1) {
1065 perror(PNAME ": write");
1066 return 1;
1067 }
1068
1069 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1070 if (!mappedfile) {
1071 perror(PNAME ": mmap");
1072 return 1;
1073 }
1074
1075 if (fakediratimes) {
1076 printf("Faking directory atimes\n");
1077 trie_fake_dir_atimes(mappedfile);
1078 }
1079
1080 printf("Building index\n");
1081 ib = indexbuild_new(mappedfile, st.st_size, count, &delta);
1082 maxpathlen = trie_maxpathlen(mappedfile);
1083 buf = snewn(maxpathlen, char);
1084 prevbuf = snewn(maxpathlen, char);
1085 tw = triewalk_new(mappedfile);
1086 prevbuf[0] = '\0';
1087 tf = triewalk_next(tw, buf);
1088 assert(tf);
1089 while (1) {
1090 int i;
1091
1092 if (totalsize - indexbuild_realsize(ib) < delta) {
1093 const void *oldfile = mappedfile;
1094 ptrdiff_t diff;
1095
1096 /*
1097 * Unmap the file, grow it, and remap it.
1098 */
1099 munmap(mappedfile, totalsize);
1100
1101 totalsize += delta;
1102 totalsize += totalsize / 10;
1103
1104 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
1105 perror(PNAME ": lseek");
1106 return 1;
1107 }
1108 if (write(fd, "\0", 1) < 1) {
1109 perror(PNAME ": write");
1110 return 1;
1111 }
1112
1113 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1114 if (!mappedfile) {
1115 perror(PNAME ": mmap");
1116 return 1;
1117 }
1118
1119 indexbuild_rebase(ib, mappedfile);
1120 triewalk_rebase(tw, mappedfile);
1121 diff = (const unsigned char *)mappedfile -
1122 (const unsigned char *)oldfile;
1123 if (prevtf)
1124 prevtf = (const struct trie_file *)
1125 (((const unsigned char *)prevtf) + diff);
1126 if (tf)
1127 tf = (const struct trie_file *)
1128 (((const unsigned char *)tf) + diff);
1129 }
1130
1131 /*
1132 * Get the next file from the index. So we are
1133 * currently holding, and have not yet
1134 * indexed, prevtf (with pathname prevbuf) and
1135 * tf (with pathname buf).
1136 */
1137 prevtf = tf;
1138 memcpy(prevbuf, buf, maxpathlen);
1139 tf = triewalk_next(tw, buf);
1140
1141 if (!tf)
1142 buf[0] = '\0';
1143
1144 /*
1145 * Find the first differing character position
1146 * between our two pathnames.
1147 */
1148 for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++);
1149
1150 /*
1151 * If prevbuf was a directory name and buf is
1152 * something inside that directory, then
1153 * trie_before() will be called on prevbuf
1154 * itself. Hence we must drop a tag before it,
1155 * so that the resulting index is usable.
1156 */
1157 if ((!prevbuf[i] && (buf[i] == pathsep ||
1158 (i > 0 && buf[i-1] == pathsep))))
1159 indexbuild_tag(ib);
1160
1161 /*
1162 * Add prevtf to the index.
1163 */
1164 indexbuild_add(ib, prevtf);
1165
1166 if (!tf) {
1167 /*
1168 * Drop an unconditional final tag, and
1169 * get out of this loop.
1170 */
1171 indexbuild_tag(ib);
1172 break;
1173 }
1174
1175 /*
1176 * If prevbuf was a filename inside some
1177 * directory which buf is outside, then
1178 * trie_before() will be called on some
1179 * pathname either equal to buf or epsilon
1180 * less than it. Either way, we're going to
1181 * need to drop a tag after prevtf.
1182 */
1183 if (strchr(prevbuf+i, pathsep) || !tf)
1184 indexbuild_tag(ib);
1185 }
1186
1187 triewalk_free(tw);
1188 realsize = indexbuild_realsize(ib);
1189 indexbuild_free(ib);
1190
1191 munmap(mappedfile, totalsize);
1192 ftruncate(fd, realsize);
1193 close(fd);
1194 printf("Final index file size = %llu bytes\n",
1195 (unsigned long long)realsize);
1196 }
1197 } else if (mode == TEXT) {
1198 char *querydir = actions[action].arg;
1199 size_t pathlen;
1200
1201 fd = open(filename, O_RDONLY);
1202 if (fd < 0) {
1203 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1204 strerror(errno));
1205 return 1;
1206 }
1207 if (fstat(fd, &st) < 0) {
1208 perror(PNAME ": fstat");
1209 return 1;
1210 }
1211 totalsize = st.st_size;
1212 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1213 if (!mappedfile) {
1214 perror(PNAME ": mmap");
1215 return 1;
1216 }
1217 pathsep = trie_pathsep(mappedfile);
1218
1219 /*
1220 * Trim trailing slash, just in case.
1221 */
1222 pathlen = strlen(querydir);
1223 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1224 querydir[--pathlen] = '\0';
1225
1226 text_query(mappedfile, querydir, textcutoff, tqdepth);
1227
1228 munmap(mappedfile, totalsize);
1229 } else if (mode == HTML) {
1230 char *querydir = actions[action].arg;
1231 size_t pathlen;
1232 struct html_config cfg;
1233 unsigned long xi;
1234 char *html;
1235
1236 fd = open(filename, O_RDONLY);
1237 if (fd < 0) {
1238 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1239 strerror(errno));
1240 return 1;
1241 }
1242 if (fstat(fd, &st) < 0) {
1243 perror(PNAME ": fstat");
1244 return 1;
1245 }
1246 totalsize = st.st_size;
1247 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1248 if (!mappedfile) {
1249 perror(PNAME ": mmap");
1250 return 1;
1251 }
1252 pathsep = trie_pathsep(mappedfile);
1253
1254 /*
1255 * Trim trailing slash, just in case.
1256 */
1257 pathlen = strlen(querydir);
1258 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1259 querydir[--pathlen] = '\0';
1260
1261 xi = trie_before(mappedfile, querydir);
1262 cfg.format = NULL;
1263 cfg.autoage = htmlautoagerange;
1264 cfg.oldest = htmloldest;
1265 cfg.newest = htmlnewest;
1266 html = html_query(mappedfile, xi, &cfg);
1267 fputs(html, stdout);
1268
1269 munmap(mappedfile, totalsize);
1270 } else if (mode == DUMP) {
1271 size_t maxpathlen;
1272 char *buf;
1273
1274 fd = open(filename, O_RDONLY);
1275 if (fd < 0) {
1276 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1277 strerror(errno));
1278 return 1;
1279 }
1280 if (fstat(fd, &st) < 0) {
1281 perror(PNAME ": fstat");
1282 return 1;
1283 }
1284 totalsize = st.st_size;
1285 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1286 if (!mappedfile) {
1287 perror(PNAME ": mmap");
1288 return 1;
1289 }
1290 pathsep = trie_pathsep(mappedfile);
1291
1292 maxpathlen = trie_maxpathlen(mappedfile);
1293 buf = snewn(maxpathlen, char);
1294
1295 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
1296 tw = triewalk_new(mappedfile);
1297 while ((tf = triewalk_next(tw, buf)) != NULL)
1298 dump_line(buf, tf);
1299 triewalk_free(tw);
1300
1301 munmap(mappedfile, totalsize);
1302 } else if (mode == HTTPD) {
1303 struct html_config pcfg;
1304 struct httpd_config dcfg;
1305
1306 fd = open(filename, O_RDONLY);
1307 if (fd < 0) {
1308 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1309 strerror(errno));
1310 return 1;
1311 }
1312 if (fstat(fd, &st) < 0) {
1313 perror(PNAME ": fstat");
1314 return 1;
1315 }
1316 totalsize = st.st_size;
1317 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1318 if (!mappedfile) {
1319 perror(PNAME ": mmap");
1320 return 1;
1321 }
1322 pathsep = trie_pathsep(mappedfile);
1323
1324 dcfg.address = httpserveraddr;
1325 dcfg.port = httpserverport;
1326 dcfg.basicauthdata = httpauthdata;
1327 pcfg.format = NULL;
1328 pcfg.autoage = htmlautoagerange;
1329 pcfg.oldest = htmloldest;
1330 pcfg.newest = htmlnewest;
1331 run_httpd(mappedfile, auth, &dcfg, &pcfg);
1332 munmap(mappedfile, totalsize);
1333 } else if (mode == REMOVE) {
1334 if (remove(filename) < 0) {
1335 fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename,
1336 strerror(errno));
1337 return 1;
1338 }
1339 }
1340 }
1341
1342 return 0;
1343 }