Additional options to control the disk scanning: wildcard-based
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #define _GNU_SOURCE
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <time.h>
13
14 #include <unistd.h>
15 #include <sys/types.h>
16 #include <fcntl.h>
17 #include <sys/mman.h>
18 #include <termios.h>
19 #include <sys/ioctl.h>
20 #include <fnmatch.h>
21
22 #include "du.h"
23 #include "trie.h"
24 #include "index.h"
25 #include "malloc.h"
26 #include "html.h"
27 #include "httpd.h"
28
29 #define PNAME "agedu"
30
31 void fatal(const char *fmt, ...)
32 {
33 va_list ap;
34 fprintf(stderr, "%s: ", PNAME);
35 va_start(ap, fmt);
36 vfprintf(stderr, fmt, ap);
37 va_end(ap);
38 fprintf(stderr, "\n");
39 exit(1);
40 }
41
42 struct inclusion_exclusion {
43 int include;
44 const char *wildcard;
45 int path;
46 };
47
48 struct ctx {
49 triebuild *tb;
50 dev_t datafile_dev, filesystem_dev;
51 ino_t datafile_ino;
52 time_t last_output_update;
53 int progress, progwidth;
54 struct inclusion_exclusion *inex;
55 int ninex;
56 int crossfs;
57 };
58
59 static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
60 {
61 struct ctx *ctx = (struct ctx *)vctx;
62 struct trie_file file;
63 time_t t;
64 int i, include;
65 const char *filename;
66
67 /*
68 * Filter out our own data file.
69 */
70 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
71 return 0;
72
73 /*
74 * Don't cross the streams^W^Wany file system boundary.
75 */
76 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
77 return 0;
78
79 /*
80 * Filter based on wildcards.
81 */
82 include = 1;
83 filename = strrchr(pathname, '/');
84 if (!filename)
85 filename = pathname;
86 else
87 filename++;
88 for (i = 0; i < ctx->ninex; i++) {
89 if (fnmatch(ctx->inex[i].wildcard,
90 ctx->inex[i].path ? pathname : filename,
91 FNM_PATHNAME) == 0)
92 include = ctx->inex[i].include;
93 }
94 if (!include)
95 return 1; /* filter, but don't prune */
96
97 file.blocks = st->st_blocks;
98 file.atime = st->st_atime;
99 triebuild_add(ctx->tb, pathname, &file);
100
101 t = time(NULL);
102 if (t != ctx->last_output_update) {
103 if (ctx->progress) {
104 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
105 pathname);
106 fflush(stderr);
107 }
108 ctx->last_output_update = t;
109 }
110
111 return 1;
112 }
113
114 static void run_query(const void *mappedfile, const char *rootdir,
115 time_t t, int depth)
116 {
117 size_t maxpathlen;
118 char *pathbuf;
119 unsigned long xi1, xi2;
120 unsigned long long s1, s2;
121
122 maxpathlen = trie_maxpathlen(mappedfile);
123 pathbuf = snewn(maxpathlen + 1, char);
124
125 /*
126 * We want to query everything between the supplied filename
127 * (inclusive) and that filename with a ^A on the end
128 * (exclusive). So find the x indices for each.
129 */
130 sprintf(pathbuf, "%s\001", rootdir);
131 xi1 = trie_before(mappedfile, rootdir);
132 xi2 = trie_before(mappedfile, pathbuf);
133
134 /*
135 * Now do the lookups in the age index.
136 */
137 s1 = index_query(mappedfile, xi1, t);
138 s2 = index_query(mappedfile, xi2, t);
139
140 /* Display in units of 2 512-byte blocks = 1Kb */
141 printf("%-11llu %s\n", (s2 - s1) / 2, rootdir);
142
143 if (depth > 0) {
144 /*
145 * Now scan for first-level subdirectories and report
146 * those too.
147 */
148 xi1++;
149 while (xi1 < xi2) {
150 trie_getpath(mappedfile, xi1, pathbuf);
151 run_query(mappedfile, pathbuf, t, depth-1);
152 strcat(pathbuf, "\001");
153 xi1 = trie_before(mappedfile, pathbuf);
154 }
155 }
156 }
157
158 int main(int argc, char **argv)
159 {
160 int fd, count;
161 struct ctx actx, *ctx = &actx;
162 struct stat st;
163 off_t totalsize, realsize;
164 void *mappedfile;
165 triewalk *tw;
166 indexbuild *ib;
167 const struct trie_file *tf;
168 char *filename = "agedu.dat";
169 char *rootdir = NULL;
170 int doing_opts = 1;
171 enum { QUERY, HTML, SCAN, DUMP, HTTPD } mode = QUERY;
172 char *minage = "0d";
173 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
174 int progress = 1;
175 struct inclusion_exclusion *inex = NULL;
176 int ninex = 0, inexsize = 0;
177 int crossfs = 0;
178
179 while (--argc > 0) {
180 char *p = *++argv;
181 char *optval;
182
183 if (doing_opts && *p == '-') {
184 if (!strcmp(p, "--")) {
185 doing_opts = 0;
186 } else if (p[1] == '-') {
187 char *optval = strchr(p, '=');
188 if (optval)
189 *optval++ = '\0';
190 if (!strcmp(p, "--help")) {
191 printf("FIXME: usage();\n");
192 return 0;
193 } else if (!strcmp(p, "--version")) {
194 printf("FIXME: version();\n");
195 return 0;
196 } else if (!strcmp(p, "--licence") ||
197 !strcmp(p, "--license")) {
198 printf("FIXME: licence();\n");
199 return 0;
200 } else if (!strcmp(p, "--scan")) {
201 mode = SCAN;
202 } else if (!strcmp(p, "--dump")) {
203 mode = DUMP;
204 } else if (!strcmp(p, "--html")) {
205 mode = HTML;
206 } else if (!strcmp(p, "--httpd") ||
207 !strcmp(p, "--server")) {
208 mode = HTTPD;
209 } else if (!strcmp(p, "--progress") ||
210 !strcmp(p, "--scan-progress")) {
211 progress = 2;
212 } else if (!strcmp(p, "--no-progress") ||
213 !strcmp(p, "--no-scan-progress")) {
214 progress = 0;
215 } else if (!strcmp(p, "--tty-progress") ||
216 !strcmp(p, "--tty-scan-progress") ||
217 !strcmp(p, "--progress-tty") ||
218 !strcmp(p, "--scan-progress-tty")) {
219 progress = 1;
220 } else if (!strcmp(p, "--crossfs")) {
221 crossfs = 1;
222 } else if (!strcmp(p, "--no-crossfs")) {
223 crossfs = 0;
224 } else if (!strcmp(p, "--file") ||
225 !strcmp(p, "--auth") ||
226 !strcmp(p, "--http-auth") ||
227 !strcmp(p, "--httpd-auth") ||
228 !strcmp(p, "--server-auth") ||
229 !strcmp(p, "--minimum-age") ||
230 !strcmp(p, "--min-age") ||
231 !strcmp(p, "--age") ||
232 !strcmp(p, "--include") ||
233 !strcmp(p, "--include-path") ||
234 !strcmp(p, "--exclude") ||
235 !strcmp(p, "--exclude-path")) {
236 /*
237 * Long options requiring values.
238 */
239 if (!optval) {
240 if (--argc > 0) {
241 optval = *++argv;
242 } else {
243 fprintf(stderr, "%s: option '%s' requires"
244 " an argument\n", PNAME, p);
245 return 1;
246 }
247 }
248 if (!strcmp(p, "--file")) {
249 filename = optval;
250 } else if (!strcmp(p, "--minimum-age") ||
251 !strcmp(p, "--min-age") ||
252 !strcmp(p, "--age")) {
253 minage = optval;
254 } else if (!strcmp(p, "--auth") ||
255 !strcmp(p, "--http-auth") ||
256 !strcmp(p, "--httpd-auth") ||
257 !strcmp(p, "--server-auth")) {
258 if (!strcmp(optval, "magic"))
259 auth = HTTPD_AUTH_MAGIC;
260 else if (!strcmp(optval, "basic"))
261 auth = HTTPD_AUTH_BASIC;
262 else if (!strcmp(optval, "none"))
263 auth = HTTPD_AUTH_NONE;
264 else if (!strcmp(optval, "default"))
265 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
266 else {
267 fprintf(stderr, "%s: unrecognised authentication"
268 " type '%s'\n%*s options are 'magic',"
269 " 'basic', 'none', 'default'\n",
270 PNAME, optval, (int)strlen(PNAME), "");
271 return 1;
272 }
273 } else if (!strcmp(p, "--include") ||
274 !strcmp(p, "--include-path") ||
275 !strcmp(p, "--exclude") ||
276 !strcmp(p, "--exclude-path")) {
277 if (ninex >= inexsize) {
278 inexsize = ninex * 3 / 2 + 16;
279 inex = sresize(inex, inexsize,
280 struct inclusion_exclusion);
281 }
282 inex[ninex].path = (!strcmp(p, "--include-path") ||
283 !strcmp(p, "--exclude-path"));
284 inex[ninex].include = (!strcmp(p, "--include") ||
285 !strcmp(p, "--include-path"));
286 inex[ninex].wildcard = optval;
287 ninex++;
288 }
289 } else {
290 fprintf(stderr, "%s: unrecognised option '%s'\n",
291 PNAME, p);
292 return 1;
293 }
294 } else {
295 p++;
296 while (*p) {
297 char c = *p++;
298
299 switch (c) {
300 /* Options requiring arguments. */
301 case 'f':
302 case 'a':
303 if (*p) {
304 optval = p;
305 p += strlen(p);
306 } else if (--argc > 0) {
307 optval = *++argv;
308 } else {
309 fprintf(stderr, "%s: option '-%c' requires"
310 " an argument\n", PNAME, c);
311 return 1;
312 }
313 switch (c) {
314 case 'f': /* data file name */
315 filename = optval;
316 break;
317 case 'a': /* maximum age */
318 minage = optval;
319 break;
320 }
321 break;
322 case 's':
323 mode = SCAN;
324 break;
325 default:
326 fprintf(stderr, "%s: unrecognised option '-%c'\n",
327 PNAME, c);
328 return 1;
329 }
330 }
331 }
332 } else {
333 if (!rootdir) {
334 rootdir = p;
335 } else {
336 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
337 return 1;
338 }
339 }
340 }
341
342 if (!rootdir)
343 rootdir = ".";
344
345 if (mode == SCAN) {
346
347 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
348 if (fd < 0) {
349 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
350 strerror(errno));
351 return 1;
352 }
353
354 if (stat(rootdir, &st) < 0) {
355 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir,
356 strerror(errno));
357 return 1;
358 }
359 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
360
361 if (fstat(fd, &st) < 0) {
362 perror("agedu: fstat");
363 return 1;
364 }
365 ctx->datafile_dev = st.st_dev;
366 ctx->datafile_ino = st.st_ino;
367 ctx->inex = inex;
368 ctx->ninex = ninex;
369 ctx->crossfs = crossfs;
370
371 ctx->last_output_update = time(NULL);
372
373 /* progress==1 means report progress only if stderr is a tty */
374 if (progress == 1)
375 progress = isatty(2) ? 2 : 0;
376 ctx->progress = progress;
377 {
378 struct winsize ws;
379 if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0)
380 ctx->progwidth = ws.ws_col - 1;
381 else
382 ctx->progwidth = 79;
383 }
384
385 /*
386 * Scan the directory tree, and write out the trie component
387 * of the data file.
388 */
389 ctx->tb = triebuild_new(fd);
390 du(rootdir, gotdata, ctx);
391 count = triebuild_finish(ctx->tb);
392 triebuild_free(ctx->tb);
393
394 if (ctx->progress) {
395 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
396 fflush(stderr);
397 }
398
399 /*
400 * Work out how much space the cumulative index trees will
401 * take; enlarge the file, and memory-map it.
402 */
403 if (fstat(fd, &st) < 0) {
404 perror("agedu: fstat");
405 return 1;
406 }
407
408 printf("Built pathname index, %d entries, %ju bytes\n", count,
409 (intmax_t)st.st_size);
410
411 totalsize = index_compute_size(st.st_size, count);
412
413 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
414 perror("agedu: lseek");
415 return 1;
416 }
417 if (write(fd, "\0", 1) < 1) {
418 perror("agedu: write");
419 return 1;
420 }
421
422 printf("Upper bound on index file size = %ju bytes\n",
423 (intmax_t)totalsize);
424
425 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
426 if (!mappedfile) {
427 perror("agedu: mmap");
428 return 1;
429 }
430
431 ib = indexbuild_new(mappedfile, st.st_size, count);
432 tw = triewalk_new(mappedfile);
433 while ((tf = triewalk_next(tw, NULL)) != NULL)
434 indexbuild_add(ib, tf);
435 triewalk_free(tw);
436 realsize = indexbuild_realsize(ib);
437 indexbuild_free(ib);
438
439 munmap(mappedfile, totalsize);
440 ftruncate(fd, realsize);
441 close(fd);
442 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
443 } else if (mode == QUERY) {
444 time_t t;
445 struct tm tm;
446 int nunits;
447 char unit[2];
448 size_t pathlen;
449
450 t = time(NULL);
451
452 if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) {
453 fprintf(stderr, "%s: minimum age should be a number followed by"
454 " one of d,w,m,y\n", PNAME);
455 return 1;
456 }
457
458 if (unit[0] == 'd') {
459 t -= 86400 * nunits;
460 } else if (unit[0] == 'w') {
461 t -= 86400 * 7 * nunits;
462 } else {
463 int ym;
464
465 tm = *localtime(&t);
466 ym = tm.tm_year * 12 + tm.tm_mon;
467
468 if (unit[0] == 'm')
469 ym -= nunits;
470 else
471 ym -= 12 * nunits;
472
473 tm.tm_year = ym / 12;
474 tm.tm_mon = ym % 12;
475
476 t = mktime(&tm);
477 }
478
479 fd = open(filename, O_RDONLY);
480 if (fd < 0) {
481 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
482 strerror(errno));
483 return 1;
484 }
485 if (fstat(fd, &st) < 0) {
486 perror("agedu: fstat");
487 return 1;
488 }
489 totalsize = st.st_size;
490 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
491 if (!mappedfile) {
492 perror("agedu: mmap");
493 return 1;
494 }
495
496 /*
497 * Trim trailing slash, just in case.
498 */
499 pathlen = strlen(rootdir);
500 if (pathlen > 0 && rootdir[pathlen-1] == '/')
501 rootdir[--pathlen] = '\0';
502
503 run_query(mappedfile, rootdir, t, 1);
504 } else if (mode == HTML) {
505 size_t pathlen;
506 unsigned long xi;
507 char *html;
508
509 fd = open(filename, O_RDONLY);
510 if (fd < 0) {
511 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
512 strerror(errno));
513 return 1;
514 }
515 if (fstat(fd, &st) < 0) {
516 perror("agedu: fstat");
517 return 1;
518 }
519 totalsize = st.st_size;
520 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
521 if (!mappedfile) {
522 perror("agedu: mmap");
523 return 1;
524 }
525
526 /*
527 * Trim trailing slash, just in case.
528 */
529 pathlen = strlen(rootdir);
530 if (pathlen > 0 && rootdir[pathlen-1] == '/')
531 rootdir[--pathlen] = '\0';
532
533 xi = trie_before(mappedfile, rootdir);
534 html = html_query(mappedfile, xi, NULL);
535 fputs(html, stdout);
536 } else if (mode == DUMP) {
537 size_t maxpathlen;
538 char *buf;
539
540 fd = open(filename, O_RDONLY);
541 if (fd < 0) {
542 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
543 strerror(errno));
544 return 1;
545 }
546 if (fstat(fd, &st) < 0) {
547 perror("agedu: fstat");
548 return 1;
549 }
550 totalsize = st.st_size;
551 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
552 if (!mappedfile) {
553 perror("agedu: mmap");
554 return 1;
555 }
556
557 maxpathlen = trie_maxpathlen(mappedfile);
558 buf = snewn(maxpathlen, char);
559
560 tw = triewalk_new(mappedfile);
561 while ((tf = triewalk_next(tw, buf)) != NULL) {
562 printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime);
563 }
564 triewalk_free(tw);
565 } else if (mode == HTTPD) {
566 fd = open(filename, O_RDONLY);
567 if (fd < 0) {
568 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
569 strerror(errno));
570 return 1;
571 }
572 if (fstat(fd, &st) < 0) {
573 perror("agedu: fstat");
574 return 1;
575 }
576 totalsize = st.st_size;
577 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
578 if (!mappedfile) {
579 perror("agedu: mmap");
580 return 1;
581 }
582
583 run_httpd(mappedfile, auth);
584 }
585
586 return 0;
587 }