Reposition r8253's change to avoid redundant index lookups.
[sgt/agedu] / agedu.c
CommitLineData
70322ae3 1/*
2 * Main program for agedu.
3 */
4
5#define _GNU_SOURCE
6#include <stdio.h>
7#include <errno.h>
8#include <stdarg.h>
9#include <stdlib.h>
10#include <stdint.h>
11#include <string.h>
12#include <time.h>
e9e7a1bf 13#include <assert.h>
70322ae3 14
15#include <unistd.h>
16#include <sys/types.h>
17#include <fcntl.h>
18#include <sys/mman.h>
8b1f55d6 19#include <termios.h>
20#include <sys/ioctl.h>
9d0b9596 21#include <fnmatch.h>
70322ae3 22
353bc75d 23#include "agedu.h"
70322ae3 24#include "du.h"
25#include "trie.h"
26#include "index.h"
27#include "malloc.h"
28#include "html.h"
29#include "httpd.h"
84849cbd 30#include "fgetline.h"
70322ae3 31
373a02e5 32/*
33 * Path separator. This global variable affects the behaviour of
34 * various parts of the code when they need to deal with path
35 * separators. The path separator appropriate to a particular data
36 * set is encoded in the index file storing that data set; data
37 * sets generated on Unix will of course have the default '/', but
38 * foreign data sets are conceivable and must be handled correctly.
39 */
40char pathsep = '/';
41
70322ae3 42void fatal(const char *fmt, ...)
43{
44 va_list ap;
45 fprintf(stderr, "%s: ", PNAME);
46 va_start(ap, fmt);
47 vfprintf(stderr, fmt, ap);
48 va_end(ap);
49 fprintf(stderr, "\n");
50 exit(1);
51}
52
9d0b9596 53struct inclusion_exclusion {
0ba55302 54 int type;
9d0b9596 55 const char *wildcard;
56 int path;
57};
58
70322ae3 59struct ctx {
60 triebuild *tb;
61 dev_t datafile_dev, filesystem_dev;
62 ino_t datafile_ino;
63 time_t last_output_update;
8b1f55d6 64 int progress, progwidth;
84849cbd 65 int straight_to_dump;
9d0b9596 66 struct inclusion_exclusion *inex;
67 int ninex;
68 int crossfs;
70322ae3 69};
70
84849cbd 71static void dump_line(const char *pathname, const struct trie_file *tf)
72{
73 const char *p;
74 printf("%llu %llu ", tf->size, tf->atime);
75 for (p = pathname; *p; p++) {
76 if (*p >= ' ' && *p < 127 && *p != '%')
77 putchar(*p);
78 else
79 printf("%%%02x", (unsigned char)*p);
80 }
81 putchar('\n');
82}
83
70322ae3 84static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
85{
86 struct ctx *ctx = (struct ctx *)vctx;
87 struct trie_file file;
88 time_t t;
9d0b9596 89 int i, include;
90 const char *filename;
70322ae3 91
92 /*
93 * Filter out our own data file.
94 */
95 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
96 return 0;
97
98 /*
99 * Don't cross the streams^W^Wany file system boundary.
70322ae3 100 */
9d0b9596 101 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
70322ae3 102 return 0;
103
84849cbd 104 file.size = (unsigned long long)512 * st->st_blocks;
0ba55302 105 file.atime = st->st_atime;
106
70322ae3 107 /*
9d0b9596 108 * Filter based on wildcards.
70322ae3 109 */
9d0b9596 110 include = 1;
373a02e5 111 filename = strrchr(pathname, pathsep);
9d0b9596 112 if (!filename)
113 filename = pathname;
114 else
115 filename++;
116 for (i = 0; i < ctx->ninex; i++) {
117 if (fnmatch(ctx->inex[i].wildcard,
0ba55302 118 ctx->inex[i].path ? pathname : filename, 0) == 0)
119 include = ctx->inex[i].type;
120 }
121 if (include == -1)
122 return 0; /* ignore this entry and any subdirs */
123 if (include == 0) {
124 /*
125 * Here we are supposed to be filtering an entry out, but
126 * still recursing into it if it's a directory. However,
127 * we can't actually leave out any directory whose
128 * subdirectories we then look at. So we cheat, in that
129 * case, by setting the size to zero.
130 */
131 if (!S_ISDIR(st->st_mode))
132 return 0; /* just ignore */
133 else
84849cbd 134 file.size = 0;
9d0b9596 135 }
70322ae3 136
84849cbd 137 if (ctx->straight_to_dump)
138 dump_line(pathname, &file);
139 else
140 triebuild_add(ctx->tb, pathname, &file);
70322ae3 141
84849cbd 142 if (ctx->progress) {
143 t = time(NULL);
144 if (t != ctx->last_output_update) {
8b1f55d6 145 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
146 pathname);
147 fflush(stderr);
84849cbd 148 ctx->last_output_update = t;
8b1f55d6 149 }
70322ae3 150 }
151
152 return 1;
153}
154
e9e7a1bf 155static void text_query(const void *mappedfile, const char *querydir,
7cf11b75 156 time_t t, int depth)
70322ae3 157{
158 size_t maxpathlen;
159 char *pathbuf;
160 unsigned long xi1, xi2;
161 unsigned long long s1, s2;
162
163 maxpathlen = trie_maxpathlen(mappedfile);
164 pathbuf = snewn(maxpathlen + 1, char);
165
166 /*
167 * We want to query everything between the supplied filename
168 * (inclusive) and that filename with a ^A on the end
169 * (exclusive). So find the x indices for each.
170 */
256c29a2 171 strcpy(pathbuf, querydir);
172 make_successor(pathbuf);
e9e7a1bf 173 xi1 = trie_before(mappedfile, querydir);
70322ae3 174 xi2 = trie_before(mappedfile, pathbuf);
175
0313b788 176 if (xi2 - xi1 == 1)
177 return; /* file, or empty dir => no display */
178
70322ae3 179 /*
180 * Now do the lookups in the age index.
181 */
182 s1 = index_query(mappedfile, xi1, t);
183 s2 = index_query(mappedfile, xi2, t);
184
010dd2a2 185 if (s1 == s2)
186 return; /* no space taken up => no display */
187
70322ae3 188 if (depth > 0) {
189 /*
190 * Now scan for first-level subdirectories and report
191 * those too.
192 */
193 xi1++;
194 while (xi1 < xi2) {
195 trie_getpath(mappedfile, xi1, pathbuf);
7cf11b75 196 text_query(mappedfile, pathbuf, t, depth-1);
256c29a2 197 make_successor(pathbuf);
70322ae3 198 xi1 = trie_before(mappedfile, pathbuf);
199 }
200 }
16e591d6 201
202 /* Display in units of 1Kb */
203 printf("%-11llu %s\n", (s2 - s1) / 1024, querydir);
70322ae3 204}
205
56fa1896 206/*
207 * Largely frivolous way to define all my command-line options. I
208 * present here a parametric macro which declares a series of
209 * _logical_ option identifiers, and for each one declares zero or
210 * more short option characters and zero or more long option
211 * words. Then I repeatedly invoke that macro with its arguments
212 * defined to be various other macros, which allows me to
213 * variously:
214 *
215 * - define an enum allocating a distinct integer value to each
216 * logical option id
217 * - define a string consisting of precisely all the short option
218 * characters
219 * - define a string array consisting of all the long option
220 * strings
221 * - define (with help from auxiliary enums) integer arrays
222 * parallel to both of the above giving the logical option id
223 * for each physical short and long option
224 * - define an array indexed by logical option id indicating
e9e7a1bf 225 * whether the option in question takes a value
226 * - define a function which prints out brief online help for all
227 * the options.
56fa1896 228 *
229 * It's not at all clear to me that this trickery is actually
230 * particularly _efficient_ - it still, after all, requires going
231 * linearly through the option list at run time and doing a
232 * strcmp, whereas in an ideal world I'd have liked the lists of
233 * long and short options to be pre-sorted so that a binary search
234 * or some other more efficient lookup was possible. (Not that
235 * asymptotic algorithmic complexity is remotely vital in option
236 * parsing, but if I were doing this in, say, Lisp or something
237 * with an equivalently powerful preprocessor then once I'd had
238 * the idea of preparing the option-parsing data structures at
239 * compile time I would probably have made the effort to prepare
240 * them _properly_. I could have Perl generate me a source file
241 * from some sort of description, I suppose, but that would seem
242 * like overkill. And in any case, it's more of a challenge to
243 * achieve as much as possible by cunning use of cpp and enum than
244 * to just write some sensible and logical code in a Turing-
245 * complete language. I said it was largely frivolous :-)
246 *
247 * This approach does have the virtue that it brings together the
e9e7a1bf 248 * option ids, option spellings and help text into a single
249 * combined list and defines them all in exactly one place. If I
250 * want to add a new option, or a new spelling for an option, I
251 * only have to modify the main OPTHELP macro below and then add
252 * code to process the new logical id.
56fa1896 253 *
254 * (Though, really, even that isn't ideal, since it still involves
255 * modifying the source file in more than one place. In a
256 * _properly_ ideal world, I'd be able to interleave the option
257 * definitions with the code fragments that process them. And then
258 * not bother defining logical identifiers for them at all - those
259 * would be automatically generated, since I wouldn't have any
260 * need to specify them manually in another part of the code.)
261 */
262
e9e7a1bf 263#define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
bf53e756 264 HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
e9e7a1bf 265 HELPPFX("actions") \
266 VAL(SCAN) SHORT(s) LONG(scan) \
267 HELPARG("directory") HELPOPT("scan and index a directory") \
84849cbd 268 NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file on stdout") \
269 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
270 HELPARG("directory") HELPOPT("scan only, generating a dump") \
271 NOVAL(LOAD) SHORT(l) LONG(load) \
272 HELPOPT("load and index a dump file") \
e9e7a1bf 273 VAL(TEXT) SHORT(t) LONG(text) \
274 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
275 VAL(HTML) SHORT(H) LONG(html) \
276 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
56fa1896 277 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
84849cbd 278 HELPOPT("serve HTML reports from a temporary web server") \
e9e7a1bf 279 HELPPFX("options") \
280 VAL(DATAFILE) SHORT(f) LONG(file) \
281 HELPARG("filename") HELPOPT("[all modes] specify index file") \
56fa1896 282 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
e9e7a1bf 283 HELPOPT("[--scan] report progress on stderr") \
56fa1896 284 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
e9e7a1bf 285 HELPOPT("[--scan] do not report progress") \
56fa1896 286 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
287 LONG(progress_tty) LONG(scan_progress_tty) \
e9e7a1bf 288 HELPOPT("[--scan] report progress if stderr is a tty") \
56fa1896 289 NOVAL(CROSSFS) LONG(cross_fs) \
e9e7a1bf 290 HELPOPT("[--scan] cross filesystem boundaries") \
56fa1896 291 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
e9e7a1bf 292 HELPOPT("[--scan] stick to one filesystem") \
56fa1896 293 VAL(INCLUDE) LONG(include) \
e9e7a1bf 294 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
56fa1896 295 VAL(INCLUDEPATH) LONG(include_path) \
e9e7a1bf 296 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
56fa1896 297 VAL(EXCLUDE) LONG(exclude) \
e9e7a1bf 298 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
299 VAL(EXCLUDEPATH) LONG(exclude_path) \
300 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
0ba55302 301 VAL(PRUNE) LONG(prune) \
302 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
303 VAL(PRUNEPATH) LONG(prune_path) \
304 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
16e591d6 305 VAL(TQDEPTH) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
306 HELPARG("levels") HELPOPT("[--text] recurse to this many levels") \
e9e7a1bf 307 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
308 HELPARG("age") HELPOPT("[--text] include only files older than this") \
f2e52893 309 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
310 HELPARG("age[-age]") HELPOPT("[--html,--web] set limits of colour coding") \
1e8d78b9 311 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
312 LONG(server_addr) \
313 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
e9e7a1bf 314 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
315 LONG(server_auth) LONG(web_auth) \
316 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
1e8d78b9 317 VAL(AUTHFILE) LONG(auth_file) \
318 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
319 VAL(AUTHFD) LONG(auth_fd) \
320 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
e9e7a1bf 321 HELPPFX("also") \
322 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
323 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
324 NOVAL(LICENCE) LONG(licence) LONG(license) \
325 HELPOPT("display (MIT) licence text") \
56fa1896 326
327#define IGNORE(x)
328#define DEFENUM(x) OPT_ ## x,
329#define ZERO(x) 0,
330#define ONE(x) 1,
331#define STRING(x) #x ,
332#define STRINGNOCOMMA(x) #x
333#define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
334#define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
335#define SHORTOPTVAL(x) SHORTVAL_ ## x,
336#define SHORTTMP(x) SHORTtmp3_ ## x,
337#define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
338#define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
339#define LONGOPTVAL(x) LONGVAL_ ## x,
340#define LONGTMP(x) SHORTtmp3_ ## x,
341
e9e7a1bf 342#define OPTIONS(NOVAL, VAL, SHORT, LONG) \
343 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
344
56fa1896 345enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
346enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
347enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
348static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
349static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
350static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
351enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
352enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
353static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
354static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
355
e9e7a1bf 356static void usage(FILE *fp)
357{
358 char longbuf[80];
359 const char *prefix, *shortopt, *longopt, *optarg;
360 int i, optex;
361
362#define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
363#define HELPNOVAL(s) optex = 0;
364#define HELPVAL(s) optex = 1;
365#define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
366#define HELPLONG(s) if (!longopt) { \
367 strcpy(longbuf, "--" #s); longopt = longbuf; \
368 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
369#define HELPPFX(s) prefix = s;
370#define HELPARG(s) optarg = s;
371#define HELPLINE(s) assert(optex == -1); \
372 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
373 HELPRESET;
374#define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
375 assert(shortopt || longopt); \
376 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
377 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
378 optarg?" ":"", optarg?optarg:""); \
379 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
380
381 HELPRESET;
382 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
383 HELPPFX, HELPARG, HELPLINE, HELPOPT);
384
385#undef HELPRESET
386#undef HELPNOVAL
387#undef HELPVAL
388#undef HELPSHORT
389#undef HELPLONG
390#undef HELPPFX
391#undef HELPARG
392#undef HELPLINE
393#undef HELPOPT
394}
395
f2e52893 396static time_t parse_age(time_t now, const char *agestr)
397{
398 time_t t;
399 struct tm tm;
400 int nunits;
401 char unit[2];
402
403 t = now;
404
405 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
406 fprintf(stderr, "%s: age specification should be a number followed by"
407 " one of d,w,m,y\n", PNAME);
408 exit(1);
409 }
410
411 if (unit[0] == 'd') {
412 t -= 86400 * nunits;
413 } else if (unit[0] == 'w') {
414 t -= 86400 * 7 * nunits;
415 } else {
416 int ym;
417
418 tm = *localtime(&t);
419 ym = tm.tm_year * 12 + tm.tm_mon;
420
421 if (unit[0] == 'm')
422 ym -= nunits;
423 else
424 ym -= 12 * nunits;
425
426 tm.tm_year = ym / 12;
427 tm.tm_mon = ym % 12;
428
429 t = mktime(&tm);
430 }
431
432 return t;
433}
434
70322ae3 435int main(int argc, char **argv)
436{
437 int fd, count;
438 struct ctx actx, *ctx = &actx;
439 struct stat st;
440 off_t totalsize, realsize;
441 void *mappedfile;
442 triewalk *tw;
443 indexbuild *ib;
444 const struct trie_file *tf;
bf53e756 445 char *filename = PNAME ".dat";
70322ae3 446 int doing_opts = 1;
444c684c 447 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD };
448 struct action {
449 int mode;
450 char *arg;
451 } *actions = NULL;
452 int nactions = 0, actionsize = 0, action;
f2e52893 453 time_t now = time(NULL);
454 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
455 int htmlautoagerange = 1;
1e8d78b9 456 const char *httpserveraddr = NULL;
457 int httpserverport = 0;
458 const char *httpauthdata = NULL;
812e4bf2 459 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
8b1f55d6 460 int progress = 1;
9d0b9596 461 struct inclusion_exclusion *inex = NULL;
462 int ninex = 0, inexsize = 0;
463 int crossfs = 0;
16e591d6 464 int tqdepth = 1;
70322ae3 465
56fa1896 466#ifdef DEBUG_MAD_OPTION_PARSING_MACROS
467 {
468 static const char *const optnames[NOPTIONS] = {
469 OPTIONS(STRING,STRING,IGNORE,IGNORE)
470 };
471 int i;
472 for (i = 0; i < NSHORTOPTS; i++)
473 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
474 opthasval[shortvals[i]] ? "value" : "no value");
475 for (i = 0; i < NLONGOPTS; i++)
476 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
477 opthasval[longvals[i]] ? "value" : "no value");
478 }
479#endif
480
70322ae3 481 while (--argc > 0) {
482 char *p = *++argv;
70322ae3 483
484 if (doing_opts && *p == '-') {
56fa1896 485 int wordstart = 1;
486
70322ae3 487 if (!strcmp(p, "--")) {
488 doing_opts = 0;
56fa1896 489 continue;
490 }
491
492 p++;
493 while (*p) {
494 int optid = -1;
495 int i;
496 char *optval;
497
498 if (wordstart && *p == '-') {
70322ae3 499 /*
56fa1896 500 * GNU-style long option.
70322ae3 501 */
56fa1896 502 p++;
503 optval = strchr(p, '=');
504 if (optval)
505 *optval++ = '\0';
506
507 for (i = 0; i < NLONGOPTS; i++) {
508 const char *opt = longopts[i], *s = p;
509 int match = 1;
510 /*
511 * The underscores in the option names
512 * defined above may be given by the user
513 * as underscores or dashes, or omitted
514 * entirely.
515 */
516 while (*opt) {
517 if (*opt == '_') {
518 if (*s == '-' || *s == '_')
519 s++;
520 } else {
521 if (*opt != *s) {
522 match = 0;
523 break;
524 }
525 s++;
526 }
527 opt++;
528 }
529 if (match && !*s) {
530 optid = longvals[i];
531 break;
70322ae3 532 }
533 }
56fa1896 534
535 if (optid < 0) {
536 fprintf(stderr, "%s: unrecognised option '--%s'\n",
537 PNAME, p);
538 return 1;
539 }
540
541 if (!opthasval[optid]) {
542 if (optval) {
543 fprintf(stderr, "%s: unexpected argument to option"
544 " '--%s'\n", PNAME, p);
812e4bf2 545 return 1;
546 }
56fa1896 547 } else {
548 if (!optval) {
549 if (--argc > 0) {
550 optval = *++argv;
551 } else {
552 fprintf(stderr, "%s: option '--%s' expects"
553 " an argument\n", PNAME, p);
554 return 1;
555 }
9d0b9596 556 }
70322ae3 557 }
56fa1896 558
559 p += strlen(p); /* finished with this argument word */
70322ae3 560 } else {
56fa1896 561 /*
562 * Short option.
563 */
70322ae3 564 char c = *p++;
565
56fa1896 566 for (i = 0; i < NSHORTOPTS; i++)
567 if (c == shortopts[i]) {
568 optid = shortvals[i];
569 break;
570 }
571
572 if (optid < 0) {
573 fprintf(stderr, "%s: unrecognised option '-%c'\n",
574 PNAME, c);
575 return 1;
576 }
577
578 if (opthasval[optid]) {
70322ae3 579 if (*p) {
580 optval = p;
581 p += strlen(p);
582 } else if (--argc > 0) {
583 optval = *++argv;
584 } else {
56fa1896 585 fprintf(stderr, "%s: option '-%c' expects"
70322ae3 586 " an argument\n", PNAME, c);
587 return 1;
588 }
56fa1896 589 } else {
590 optval = NULL;
591 }
592 }
593
594 wordstart = 0;
595
596 /*
597 * Now actually process the option.
598 */
599 switch (optid) {
600 case OPT_HELP:
e9e7a1bf 601 usage(stdout);
56fa1896 602 return 0;
603 case OPT_VERSION:
604 printf("FIXME: version();\n");
605 return 0;
606 case OPT_LICENCE:
5a29503d 607 {
608 extern const char *const licence[];
609 int i;
610
611 for (i = 0; licence[i]; i++)
612 fputs(licence[i], stdout);
613
614 return 0;
615 }
56fa1896 616 return 0;
617 case OPT_SCAN:
444c684c 618 if (nactions >= actionsize) {
619 actionsize = nactions * 3 / 2 + 16;
620 actions = sresize(actions, actionsize, struct action);
621 }
622 actions[nactions].mode = SCAN;
623 actions[nactions].arg = optval;
624 nactions++;
56fa1896 625 break;
84849cbd 626 case OPT_SCANDUMP:
444c684c 627 if (nactions >= actionsize) {
628 actionsize = nactions * 3 / 2 + 16;
629 actions = sresize(actions, actionsize, struct action);
630 }
631 actions[nactions].mode = SCANDUMP;
632 actions[nactions].arg = optval;
633 nactions++;
84849cbd 634 break;
56fa1896 635 case OPT_DUMP:
444c684c 636 if (nactions >= actionsize) {
637 actionsize = nactions * 3 / 2 + 16;
638 actions = sresize(actions, actionsize, struct action);
639 }
640 actions[nactions].mode = DUMP;
641 actions[nactions].arg = NULL;
642 nactions++;
56fa1896 643 break;
84849cbd 644 case OPT_LOAD:
444c684c 645 if (nactions >= actionsize) {
646 actionsize = nactions * 3 / 2 + 16;
647 actions = sresize(actions, actionsize, struct action);
648 }
649 actions[nactions].mode = LOAD;
650 actions[nactions].arg = NULL;
651 nactions++;
84849cbd 652 break;
56fa1896 653 case OPT_TEXT:
444c684c 654 if (nactions >= actionsize) {
655 actionsize = nactions * 3 / 2 + 16;
656 actions = sresize(actions, actionsize, struct action);
657 }
658 actions[nactions].mode = TEXT;
659 actions[nactions].arg = optval;
660 nactions++;
56fa1896 661 break;
662 case OPT_HTML:
444c684c 663 if (nactions >= actionsize) {
664 actionsize = nactions * 3 / 2 + 16;
665 actions = sresize(actions, actionsize, struct action);
666 }
667 actions[nactions].mode = HTML;
668 actions[nactions].arg = optval;
669 nactions++;
56fa1896 670 break;
671 case OPT_HTTPD:
444c684c 672 if (nactions >= actionsize) {
673 actionsize = nactions * 3 / 2 + 16;
674 actions = sresize(actions, actionsize, struct action);
675 }
676 actions[nactions].mode = HTTPD;
677 actions[nactions].arg = NULL;
678 nactions++;
56fa1896 679 break;
680 case OPT_PROGRESS:
681 progress = 2;
682 break;
683 case OPT_NOPROGRESS:
684 progress = 0;
685 break;
686 case OPT_TTYPROGRESS:
687 progress = 1;
688 break;
689 case OPT_CROSSFS:
690 crossfs = 1;
691 break;
692 case OPT_NOCROSSFS:
693 crossfs = 0;
694 break;
695 case OPT_DATAFILE:
696 filename = optval;
697 break;
16e591d6 698 case OPT_TQDEPTH:
699 tqdepth = atoi(optval);
700 break;
56fa1896 701 case OPT_MINAGE:
f2e52893 702 textcutoff = parse_age(now, optval);
703 break;
704 case OPT_AGERANGE:
705 if (!strcmp(optval, "auto")) {
706 htmlautoagerange = 1;
707 } else {
708 char *q = optval + strcspn(optval, "-:");
709 if (*q)
710 *q++ = '\0';
711 htmloldest = parse_age(now, optval);
712 htmlnewest = *q ? parse_age(now, q) : now;
713 htmlautoagerange = 0;
714 }
56fa1896 715 break;
1e8d78b9 716 case OPT_SERVERADDR:
717 {
718 char *port;
719 if (optval[0] == '[' &&
720 (port = strchr(optval, ']')) != NULL)
721 port++;
722 else
723 port = optval;
724 port += strcspn(port, ":");
725 if (port)
726 *port++ = '\0';
727 httpserveraddr = optval;
728 httpserverport = atoi(port);
729 }
730 break;
56fa1896 731 case OPT_AUTH:
732 if (!strcmp(optval, "magic"))
733 auth = HTTPD_AUTH_MAGIC;
734 else if (!strcmp(optval, "basic"))
735 auth = HTTPD_AUTH_BASIC;
736 else if (!strcmp(optval, "none"))
737 auth = HTTPD_AUTH_NONE;
738 else if (!strcmp(optval, "default"))
739 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
f2e52893 740 else if (!strcmp(optval, "help") ||
741 !strcmp(optval, "list")) {
bf53e756 742 printf(PNAME ": supported HTTP authentication types"
f2e52893 743 " are:\n"
744 " magic use Linux /proc/net/tcp to"
745 " determine owner of peer socket\n"
746 " basic HTTP Basic username and"
747 " password authentication\n"
748 " default use 'magic' if possible, "
749 " otherwise fall back to 'basic'\n"
750 " none unauthenticated HTTP (if"
751 " the data file is non-confidential)\n");
752 return 0;
753 } else {
56fa1896 754 fprintf(stderr, "%s: unrecognised authentication"
755 " type '%s'\n%*s options are 'magic',"
756 " 'basic', 'none', 'default'\n",
757 PNAME, optval, (int)strlen(PNAME), "");
758 return 1;
759 }
760 break;
1e8d78b9 761 case OPT_AUTHFILE:
762 case OPT_AUTHFD:
763 {
764 int fd;
765 char namebuf[40];
766 const char *name;
767 char *authbuf;
768 int authlen, authsize;
769 int ret;
770
771 if (optid == OPT_AUTHFILE) {
772 fd = open(optval, O_RDONLY);
773 if (fd < 0) {
774 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
775 optval, strerror(errno));
776 return 1;
777 }
778 name = optval;
779 } else {
780 fd = atoi(optval);
781 name = namebuf;
782 sprintf(namebuf, "fd %d", fd);
783 }
784
785 authlen = 0;
786 authsize = 256;
787 authbuf = snewn(authsize, char);
788 while ((ret = read(fd, authbuf+authlen,
789 authsize-authlen)) > 0) {
790 authlen += ret;
791 if ((authsize - authlen) < (authsize / 16)) {
792 authsize = authlen * 3 / 2 + 4096;
793 authbuf = sresize(authbuf, authsize, char);
794 }
795 }
796 if (ret < 0) {
797 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
798 name, strerror(errno));
799 return 1;
800 }
801 if (optid == OPT_AUTHFILE)
802 close(fd);
803 httpauthdata = authbuf;
804 }
805 break;
56fa1896 806 case OPT_INCLUDE:
807 case OPT_INCLUDEPATH:
808 case OPT_EXCLUDE:
809 case OPT_EXCLUDEPATH:
0ba55302 810 case OPT_PRUNE:
811 case OPT_PRUNEPATH:
56fa1896 812 if (ninex >= inexsize) {
813 inexsize = ninex * 3 / 2 + 16;
814 inex = sresize(inex, inexsize,
815 struct inclusion_exclusion);
816 }
817 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
0ba55302 818 optid == OPT_EXCLUDEPATH ||
819 optid == OPT_PRUNEPATH);
820 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
821 optid == OPT_INCLUDEPATH ? 1 :
822 optid == OPT_EXCLUDE ? 0 :
823 optid == OPT_EXCLUDEPATH ? 0 :
824 optid == OPT_PRUNE ? -1 :
825 /* optid == OPT_PRUNEPATH ? */ -1);
56fa1896 826 inex[ninex].wildcard = optval;
827 ninex++;
828 break;
829 }
830 }
70322ae3 831 } else {
e9e7a1bf 832 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
833 return 1;
70322ae3 834 }
835 }
836
444c684c 837 if (nactions == 0) {
e9e7a1bf 838 usage(stderr);
839 return 1;
444c684c 840 }
841
842 for (action = 0; action < nactions; action++) {
843 int mode = actions[action].mode;
844
845 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
846 const char *scandir = actions[action].arg;
847 if (mode == LOAD) {
848 char *buf = fgetline(stdin);
849 unsigned newpathsep;
850 buf[strcspn(buf, "\r\n")] = '\0';
bf53e756 851 if (1 != sscanf(buf, DUMPHDR "%x",
444c684c 852 &newpathsep)) {
853 fprintf(stderr, "%s: header in dump file not recognised\n",
854 PNAME);
855 return 1;
856 }
857 pathsep = (char)newpathsep;
858 sfree(buf);
84849cbd 859 }
70322ae3 860
444c684c 861 if (mode == SCAN || mode == LOAD) {
862 /*
863 * Prepare to write out the index file.
864 */
865 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
866 if (fd < 0) {
867 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
868 strerror(errno));
869 return 1;
870 }
871 if (fstat(fd, &st) < 0) {
bf53e756 872 perror(PNAME ": fstat");
444c684c 873 return 1;
874 }
875 ctx->datafile_dev = st.st_dev;
876 ctx->datafile_ino = st.st_ino;
877 ctx->straight_to_dump = 0;
878 } else {
879 ctx->datafile_dev = -1;
880 ctx->datafile_ino = -1;
881 ctx->straight_to_dump = 1;
84849cbd 882 }
444c684c 883
884 if (mode == SCAN || mode == SCANDUMP) {
885 if (stat(scandir, &st) < 0) {
886 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
887 strerror(errno));
888 return 1;
889 }
890 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
84849cbd 891 }
70322ae3 892
444c684c 893 ctx->inex = inex;
894 ctx->ninex = ninex;
895 ctx->crossfs = crossfs;
896
897 ctx->last_output_update = time(NULL);
898
899 /* progress==1 means report progress only if stderr is a tty */
900 if (progress == 1)
901 progress = isatty(2) ? 2 : 0;
902 ctx->progress = progress;
903 {
904 struct winsize ws;
905 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
906 ctx->progwidth = ws.ws_col - 1;
907 else
908 ctx->progwidth = 79;
84849cbd 909 }
84849cbd 910
444c684c 911 if (mode == SCANDUMP)
bf53e756 912 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
8b1f55d6 913
444c684c 914 /*
915 * Scan the directory tree, and write out the trie component
916 * of the data file.
917 */
918 if (mode != SCANDUMP) {
919 ctx->tb = triebuild_new(fd);
920 }
921 if (mode == LOAD) {
922 char *buf;
923 int line = 2;
924 while ((buf = fgetline(stdin)) != NULL) {
925 struct trie_file tf;
926 char *p, *q;
927
928 buf[strcspn(buf, "\r\n")] = '\0';
929
930 p = buf;
931 q = p;
932 while (*p && *p != ' ') p++;
933 if (!*p) {
934 fprintf(stderr, "%s: dump file line %d: expected at least"
935 " three fields\n", PNAME, line);
936 return 1;
937 }
938 *p++ = '\0';
939 tf.size = strtoull(q, NULL, 10);
940 q = p;
941 while (*p && *p != ' ') p++;
942 if (!*p) {
943 fprintf(stderr, "%s: dump file line %d: expected at least"
944 " three fields\n", PNAME, line);
945 return 1;
946 }
947 *p++ = '\0';
948 tf.atime = strtoull(q, NULL, 10);
949 q = buf;
950 while (*p) {
951 int c = *p;
952 if (*p == '%') {
953 int i;
954 p++;
955 c = 0;
956 for (i = 0; i < 2; i++) {
957 if (*p >= '0' && *p <= '9')
958 c += *p - '0';
959 else if (*p >= 'A' && *p <= 'F')
960 c += *p - ('A' - 10);
961 else if (*p >= 'a' && *p <= 'f')
962 c += *p - ('a' - 10);
963 else {
964 fprintf(stderr, "%s: dump file line %d: unable"
965 " to parse hex escape\n", PNAME, line);
966 }
967 p++;
968 }
969 }
970 *q++ = c;
971 p++;
972 }
973 *q = '\0';
974 triebuild_add(ctx->tb, buf, &tf);
975 sfree(buf);
976 }
977 } else {
978 du(scandir, gotdata, ctx);
979 }
980 if (mode != SCANDUMP) {
981 count = triebuild_finish(ctx->tb);
982 triebuild_free(ctx->tb);
84849cbd 983
444c684c 984 if (ctx->progress) {
985 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
986 fflush(stderr);
987 }
84849cbd 988
444c684c 989 /*
990 * Work out how much space the cumulative index trees
991 * will take; enlarge the file, and memory-map it.
992 */
993 if (fstat(fd, &st) < 0) {
bf53e756 994 perror(PNAME ": fstat");
444c684c 995 return 1;
996 }
84849cbd 997
444c684c 998 printf("Built pathname index, %d entries, %ju bytes\n", count,
999 (intmax_t)st.st_size);
1000
1001 totalsize = index_compute_size(st.st_size, count);
1002
1003 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
bf53e756 1004 perror(PNAME ": lseek");
84849cbd 1005 return 1;
1006 }
444c684c 1007 if (write(fd, "\0", 1) < 1) {
bf53e756 1008 perror(PNAME ": write");
84849cbd 1009 return 1;
1010 }
444c684c 1011
1012 printf("Upper bound on index file size = %ju bytes\n",
1013 (intmax_t)totalsize);
1014
1015 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1016 if (!mappedfile) {
bf53e756 1017 perror(PNAME ": mmap");
444c684c 1018 return 1;
84849cbd 1019 }
444c684c 1020
1021 ib = indexbuild_new(mappedfile, st.st_size, count);
1022 tw = triewalk_new(mappedfile);
1023 while ((tf = triewalk_next(tw, NULL)) != NULL)
1024 indexbuild_add(ib, tf);
1025 triewalk_free(tw);
1026 realsize = indexbuild_realsize(ib);
1027 indexbuild_free(ib);
1028
1029 munmap(mappedfile, totalsize);
1030 ftruncate(fd, realsize);
1031 close(fd);
1032 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
84849cbd 1033 }
444c684c 1034 } else if (mode == TEXT) {
1035 char *querydir = actions[action].arg;
1036 size_t pathlen;
70322ae3 1037
444c684c 1038 fd = open(filename, O_RDONLY);
1039 if (fd < 0) {
1040 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1041 strerror(errno));
1042 return 1;
1043 }
1044 if (fstat(fd, &st) < 0) {
bf53e756 1045 perror(PNAME ": fstat");
444c684c 1046 return 1;
1047 }
1048 totalsize = st.st_size;
1049 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1050 if (!mappedfile) {
bf53e756 1051 perror(PNAME ": mmap");
444c684c 1052 return 1;
84849cbd 1053 }
444c684c 1054 pathsep = trie_pathsep(mappedfile);
70322ae3 1055
84849cbd 1056 /*
444c684c 1057 * Trim trailing slash, just in case.
84849cbd 1058 */
444c684c 1059 pathlen = strlen(querydir);
1060 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1061 querydir[--pathlen] = '\0';
1062
16e591d6 1063 text_query(mappedfile, querydir, textcutoff, tqdepth);
444c684c 1064 } else if (mode == HTML) {
1065 char *querydir = actions[action].arg;
1066 size_t pathlen;
1067 struct html_config cfg;
1068 unsigned long xi;
1069 char *html;
1070
1071 fd = open(filename, O_RDONLY);
1072 if (fd < 0) {
1073 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1074 strerror(errno));
1075 return 1;
1076 }
84849cbd 1077 if (fstat(fd, &st) < 0) {
bf53e756 1078 perror(PNAME ": fstat");
84849cbd 1079 return 1;
1080 }
444c684c 1081 totalsize = st.st_size;
1082 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1083 if (!mappedfile) {
bf53e756 1084 perror(PNAME ": mmap");
444c684c 1085 return 1;
1086 }
1087 pathsep = trie_pathsep(mappedfile);
70322ae3 1088
444c684c 1089 /*
1090 * Trim trailing slash, just in case.
1091 */
1092 pathlen = strlen(querydir);
1093 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1094 querydir[--pathlen] = '\0';
1095
1096 xi = trie_before(mappedfile, querydir);
1097 cfg.format = NULL;
1098 cfg.autoage = htmlautoagerange;
1099 cfg.oldest = htmloldest;
1100 cfg.newest = htmlnewest;
1101 html = html_query(mappedfile, xi, &cfg);
1102 fputs(html, stdout);
1103 } else if (mode == DUMP) {
1104 size_t maxpathlen;
1105 char *buf;
70322ae3 1106
444c684c 1107 fd = open(filename, O_RDONLY);
1108 if (fd < 0) {
1109 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1110 strerror(errno));
84849cbd 1111 return 1;
1112 }
444c684c 1113 if (fstat(fd, &st) < 0) {
bf53e756 1114 perror(PNAME ": fstat");
84849cbd 1115 return 1;
1116 }
444c684c 1117 totalsize = st.st_size;
1118 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
84849cbd 1119 if (!mappedfile) {
bf53e756 1120 perror(PNAME ": mmap");
84849cbd 1121 return 1;
1122 }
444c684c 1123 pathsep = trie_pathsep(mappedfile);
1124
1125 maxpathlen = trie_maxpathlen(mappedfile);
1126 buf = snewn(maxpathlen, char);
84849cbd 1127
bf53e756 1128 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
84849cbd 1129 tw = triewalk_new(mappedfile);
444c684c 1130 while ((tf = triewalk_next(tw, buf)) != NULL)
1131 dump_line(buf, tf);
84849cbd 1132 triewalk_free(tw);
444c684c 1133 } else if (mode == HTTPD) {
1134 struct html_config pcfg;
1135 struct httpd_config dcfg;
70322ae3 1136
444c684c 1137 fd = open(filename, O_RDONLY);
1138 if (fd < 0) {
1139 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1140 strerror(errno));
1141 return 1;
1142 }
1143 if (fstat(fd, &st) < 0) {
bf53e756 1144 perror(PNAME ": fstat");
444c684c 1145 return 1;
1146 }
1147 totalsize = st.st_size;
1148 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1149 if (!mappedfile) {
bf53e756 1150 perror(PNAME ": mmap");
444c684c 1151 return 1;
1152 }
1153 pathsep = trie_pathsep(mappedfile);
1154
1155 dcfg.address = httpserveraddr;
1156 dcfg.port = httpserverport;
1157 dcfg.basicauthdata = httpauthdata;
1158 pcfg.format = NULL;
1159 pcfg.autoage = htmlautoagerange;
1160 pcfg.oldest = htmloldest;
1161 pcfg.newest = htmlnewest;
1162 run_httpd(mappedfile, auth, &dcfg, &pcfg);
70322ae3 1163 }
70322ae3 1164 }
1165
1166 return 0;
1167}