70322ae3 |
1 | /* |
2 | * Main program for agedu. |
3 | */ |
4 | |
5 | #define _GNU_SOURCE |
6 | #include <stdio.h> |
7 | #include <errno.h> |
8 | #include <stdarg.h> |
9 | #include <stdlib.h> |
10 | #include <stdint.h> |
11 | #include <string.h> |
12 | #include <time.h> |
13 | |
14 | #include <unistd.h> |
15 | #include <sys/types.h> |
16 | #include <fcntl.h> |
17 | #include <sys/mman.h> |
8b1f55d6 |
18 | #include <termios.h> |
19 | #include <sys/ioctl.h> |
9d0b9596 |
20 | #include <fnmatch.h> |
70322ae3 |
21 | |
22 | #include "du.h" |
23 | #include "trie.h" |
24 | #include "index.h" |
25 | #include "malloc.h" |
26 | #include "html.h" |
27 | #include "httpd.h" |
28 | |
29 | #define PNAME "agedu" |
30 | |
31 | void fatal(const char *fmt, ...) |
32 | { |
33 | va_list ap; |
34 | fprintf(stderr, "%s: ", PNAME); |
35 | va_start(ap, fmt); |
36 | vfprintf(stderr, fmt, ap); |
37 | va_end(ap); |
38 | fprintf(stderr, "\n"); |
39 | exit(1); |
40 | } |
41 | |
9d0b9596 |
42 | struct inclusion_exclusion { |
43 | int include; |
44 | const char *wildcard; |
45 | int path; |
46 | }; |
47 | |
70322ae3 |
48 | struct ctx { |
49 | triebuild *tb; |
50 | dev_t datafile_dev, filesystem_dev; |
51 | ino_t datafile_ino; |
52 | time_t last_output_update; |
8b1f55d6 |
53 | int progress, progwidth; |
9d0b9596 |
54 | struct inclusion_exclusion *inex; |
55 | int ninex; |
56 | int crossfs; |
70322ae3 |
57 | }; |
58 | |
59 | static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) |
60 | { |
61 | struct ctx *ctx = (struct ctx *)vctx; |
62 | struct trie_file file; |
63 | time_t t; |
9d0b9596 |
64 | int i, include; |
65 | const char *filename; |
70322ae3 |
66 | |
67 | /* |
68 | * Filter out our own data file. |
69 | */ |
70 | if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino) |
71 | return 0; |
72 | |
73 | /* |
74 | * Don't cross the streams^W^Wany file system boundary. |
70322ae3 |
75 | */ |
9d0b9596 |
76 | if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev) |
70322ae3 |
77 | return 0; |
78 | |
79 | /* |
9d0b9596 |
80 | * Filter based on wildcards. |
70322ae3 |
81 | */ |
9d0b9596 |
82 | include = 1; |
83 | filename = strrchr(pathname, '/'); |
84 | if (!filename) |
85 | filename = pathname; |
86 | else |
87 | filename++; |
88 | for (i = 0; i < ctx->ninex; i++) { |
89 | if (fnmatch(ctx->inex[i].wildcard, |
90 | ctx->inex[i].path ? pathname : filename, |
91 | FNM_PATHNAME) == 0) |
92 | include = ctx->inex[i].include; |
93 | } |
94 | if (!include) |
95 | return 1; /* filter, but don't prune */ |
70322ae3 |
96 | |
97 | file.blocks = st->st_blocks; |
98 | file.atime = st->st_atime; |
99 | triebuild_add(ctx->tb, pathname, &file); |
100 | |
101 | t = time(NULL); |
102 | if (t != ctx->last_output_update) { |
8b1f55d6 |
103 | if (ctx->progress) { |
104 | fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth, |
105 | pathname); |
106 | fflush(stderr); |
107 | } |
70322ae3 |
108 | ctx->last_output_update = t; |
109 | } |
110 | |
111 | return 1; |
112 | } |
113 | |
7cf11b75 |
114 | static void text_query(const void *mappedfile, const char *rootdir, |
115 | time_t t, int depth) |
70322ae3 |
116 | { |
117 | size_t maxpathlen; |
118 | char *pathbuf; |
119 | unsigned long xi1, xi2; |
120 | unsigned long long s1, s2; |
121 | |
122 | maxpathlen = trie_maxpathlen(mappedfile); |
123 | pathbuf = snewn(maxpathlen + 1, char); |
124 | |
125 | /* |
126 | * We want to query everything between the supplied filename |
127 | * (inclusive) and that filename with a ^A on the end |
128 | * (exclusive). So find the x indices for each. |
129 | */ |
130 | sprintf(pathbuf, "%s\001", rootdir); |
131 | xi1 = trie_before(mappedfile, rootdir); |
132 | xi2 = trie_before(mappedfile, pathbuf); |
133 | |
134 | /* |
135 | * Now do the lookups in the age index. |
136 | */ |
137 | s1 = index_query(mappedfile, xi1, t); |
138 | s2 = index_query(mappedfile, xi2, t); |
139 | |
140 | /* Display in units of 2 512-byte blocks = 1Kb */ |
141 | printf("%-11llu %s\n", (s2 - s1) / 2, rootdir); |
142 | |
143 | if (depth > 0) { |
144 | /* |
145 | * Now scan for first-level subdirectories and report |
146 | * those too. |
147 | */ |
148 | xi1++; |
149 | while (xi1 < xi2) { |
150 | trie_getpath(mappedfile, xi1, pathbuf); |
7cf11b75 |
151 | text_query(mappedfile, pathbuf, t, depth-1); |
70322ae3 |
152 | strcat(pathbuf, "\001"); |
153 | xi1 = trie_before(mappedfile, pathbuf); |
154 | } |
155 | } |
156 | } |
157 | |
158 | int main(int argc, char **argv) |
159 | { |
160 | int fd, count; |
161 | struct ctx actx, *ctx = &actx; |
162 | struct stat st; |
163 | off_t totalsize, realsize; |
164 | void *mappedfile; |
165 | triewalk *tw; |
166 | indexbuild *ib; |
167 | const struct trie_file *tf; |
168 | char *filename = "agedu.dat"; |
169 | char *rootdir = NULL; |
170 | int doing_opts = 1; |
7cf11b75 |
171 | enum { USAGE, TEXT, HTML, SCAN, DUMP, HTTPD } mode = USAGE; |
70322ae3 |
172 | char *minage = "0d"; |
812e4bf2 |
173 | int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; |
8b1f55d6 |
174 | int progress = 1; |
9d0b9596 |
175 | struct inclusion_exclusion *inex = NULL; |
176 | int ninex = 0, inexsize = 0; |
177 | int crossfs = 0; |
70322ae3 |
178 | |
179 | while (--argc > 0) { |
180 | char *p = *++argv; |
181 | char *optval; |
182 | |
183 | if (doing_opts && *p == '-') { |
184 | if (!strcmp(p, "--")) { |
185 | doing_opts = 0; |
186 | } else if (p[1] == '-') { |
187 | char *optval = strchr(p, '='); |
188 | if (optval) |
189 | *optval++ = '\0'; |
190 | if (!strcmp(p, "--help")) { |
191 | printf("FIXME: usage();\n"); |
192 | return 0; |
193 | } else if (!strcmp(p, "--version")) { |
194 | printf("FIXME: version();\n"); |
195 | return 0; |
196 | } else if (!strcmp(p, "--licence") || |
197 | !strcmp(p, "--license")) { |
198 | printf("FIXME: licence();\n"); |
199 | return 0; |
200 | } else if (!strcmp(p, "--scan")) { |
201 | mode = SCAN; |
202 | } else if (!strcmp(p, "--dump")) { |
203 | mode = DUMP; |
7cf11b75 |
204 | } else if (!strcmp(p, "--text")) { |
205 | mode = TEXT; |
70322ae3 |
206 | } else if (!strcmp(p, "--html")) { |
207 | mode = HTML; |
208 | } else if (!strcmp(p, "--httpd") || |
209 | !strcmp(p, "--server")) { |
210 | mode = HTTPD; |
8b1f55d6 |
211 | } else if (!strcmp(p, "--progress") || |
212 | !strcmp(p, "--scan-progress")) { |
213 | progress = 2; |
214 | } else if (!strcmp(p, "--no-progress") || |
215 | !strcmp(p, "--no-scan-progress")) { |
216 | progress = 0; |
217 | } else if (!strcmp(p, "--tty-progress") || |
218 | !strcmp(p, "--tty-scan-progress") || |
219 | !strcmp(p, "--progress-tty") || |
220 | !strcmp(p, "--scan-progress-tty")) { |
221 | progress = 1; |
9d0b9596 |
222 | } else if (!strcmp(p, "--crossfs")) { |
223 | crossfs = 1; |
224 | } else if (!strcmp(p, "--no-crossfs")) { |
225 | crossfs = 0; |
70322ae3 |
226 | } else if (!strcmp(p, "--file") || |
812e4bf2 |
227 | !strcmp(p, "--auth") || |
228 | !strcmp(p, "--http-auth") || |
229 | !strcmp(p, "--httpd-auth") || |
230 | !strcmp(p, "--server-auth") || |
70322ae3 |
231 | !strcmp(p, "--minimum-age") || |
232 | !strcmp(p, "--min-age") || |
9d0b9596 |
233 | !strcmp(p, "--age") || |
234 | !strcmp(p, "--include") || |
235 | !strcmp(p, "--include-path") || |
236 | !strcmp(p, "--exclude") || |
237 | !strcmp(p, "--exclude-path")) { |
70322ae3 |
238 | /* |
239 | * Long options requiring values. |
240 | */ |
241 | if (!optval) { |
242 | if (--argc > 0) { |
243 | optval = *++argv; |
244 | } else { |
245 | fprintf(stderr, "%s: option '%s' requires" |
246 | " an argument\n", PNAME, p); |
247 | return 1; |
248 | } |
249 | } |
250 | if (!strcmp(p, "--file")) { |
251 | filename = optval; |
252 | } else if (!strcmp(p, "--minimum-age") || |
253 | !strcmp(p, "--min-age") || |
254 | !strcmp(p, "--age")) { |
255 | minage = optval; |
812e4bf2 |
256 | } else if (!strcmp(p, "--auth") || |
257 | !strcmp(p, "--http-auth") || |
258 | !strcmp(p, "--httpd-auth") || |
259 | !strcmp(p, "--server-auth")) { |
260 | if (!strcmp(optval, "magic")) |
261 | auth = HTTPD_AUTH_MAGIC; |
262 | else if (!strcmp(optval, "basic")) |
263 | auth = HTTPD_AUTH_BASIC; |
264 | else if (!strcmp(optval, "none")) |
265 | auth = HTTPD_AUTH_NONE; |
266 | else if (!strcmp(optval, "default")) |
267 | auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; |
268 | else { |
269 | fprintf(stderr, "%s: unrecognised authentication" |
270 | " type '%s'\n%*s options are 'magic'," |
271 | " 'basic', 'none', 'default'\n", |
272 | PNAME, optval, (int)strlen(PNAME), ""); |
273 | return 1; |
274 | } |
9d0b9596 |
275 | } else if (!strcmp(p, "--include") || |
276 | !strcmp(p, "--include-path") || |
277 | !strcmp(p, "--exclude") || |
278 | !strcmp(p, "--exclude-path")) { |
279 | if (ninex >= inexsize) { |
280 | inexsize = ninex * 3 / 2 + 16; |
281 | inex = sresize(inex, inexsize, |
282 | struct inclusion_exclusion); |
283 | } |
284 | inex[ninex].path = (!strcmp(p, "--include-path") || |
285 | !strcmp(p, "--exclude-path")); |
286 | inex[ninex].include = (!strcmp(p, "--include") || |
287 | !strcmp(p, "--include-path")); |
288 | inex[ninex].wildcard = optval; |
289 | ninex++; |
70322ae3 |
290 | } |
291 | } else { |
292 | fprintf(stderr, "%s: unrecognised option '%s'\n", |
293 | PNAME, p); |
294 | return 1; |
295 | } |
296 | } else { |
297 | p++; |
298 | while (*p) { |
299 | char c = *p++; |
300 | |
301 | switch (c) { |
302 | /* Options requiring arguments. */ |
303 | case 'f': |
304 | case 'a': |
305 | if (*p) { |
306 | optval = p; |
307 | p += strlen(p); |
308 | } else if (--argc > 0) { |
309 | optval = *++argv; |
310 | } else { |
311 | fprintf(stderr, "%s: option '-%c' requires" |
312 | " an argument\n", PNAME, c); |
313 | return 1; |
314 | } |
315 | switch (c) { |
316 | case 'f': /* data file name */ |
317 | filename = optval; |
318 | break; |
319 | case 'a': /* maximum age */ |
320 | minage = optval; |
321 | break; |
322 | } |
323 | break; |
324 | case 's': |
325 | mode = SCAN; |
326 | break; |
327 | default: |
328 | fprintf(stderr, "%s: unrecognised option '-%c'\n", |
329 | PNAME, c); |
330 | return 1; |
331 | } |
332 | } |
333 | } |
334 | } else { |
335 | if (!rootdir) { |
336 | rootdir = p; |
337 | } else { |
338 | fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p); |
339 | return 1; |
340 | } |
341 | } |
342 | } |
343 | |
344 | if (!rootdir) |
345 | rootdir = "."; |
346 | |
7cf11b75 |
347 | if (mode == USAGE) { |
348 | printf("FIXME: usage();\n"); |
349 | return 0; |
350 | } else if (mode == SCAN) { |
70322ae3 |
351 | |
352 | fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); |
353 | if (fd < 0) { |
354 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
355 | strerror(errno)); |
356 | return 1; |
357 | } |
358 | |
359 | if (stat(rootdir, &st) < 0) { |
360 | fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir, |
361 | strerror(errno)); |
362 | return 1; |
363 | } |
9d0b9596 |
364 | ctx->filesystem_dev = crossfs ? 0 : st.st_dev; |
70322ae3 |
365 | |
366 | if (fstat(fd, &st) < 0) { |
367 | perror("agedu: fstat"); |
368 | return 1; |
369 | } |
370 | ctx->datafile_dev = st.st_dev; |
371 | ctx->datafile_ino = st.st_ino; |
9d0b9596 |
372 | ctx->inex = inex; |
373 | ctx->ninex = ninex; |
374 | ctx->crossfs = crossfs; |
70322ae3 |
375 | |
376 | ctx->last_output_update = time(NULL); |
377 | |
8b1f55d6 |
378 | /* progress==1 means report progress only if stderr is a tty */ |
379 | if (progress == 1) |
380 | progress = isatty(2) ? 2 : 0; |
381 | ctx->progress = progress; |
382 | { |
383 | struct winsize ws; |
384 | if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0) |
385 | ctx->progwidth = ws.ws_col - 1; |
386 | else |
387 | ctx->progwidth = 79; |
388 | } |
389 | |
70322ae3 |
390 | /* |
391 | * Scan the directory tree, and write out the trie component |
392 | * of the data file. |
393 | */ |
394 | ctx->tb = triebuild_new(fd); |
395 | du(rootdir, gotdata, ctx); |
396 | count = triebuild_finish(ctx->tb); |
397 | triebuild_free(ctx->tb); |
398 | |
8b1f55d6 |
399 | if (ctx->progress) { |
400 | fprintf(stderr, "%-*s\r", ctx->progwidth, ""); |
401 | fflush(stderr); |
402 | } |
70322ae3 |
403 | |
404 | /* |
405 | * Work out how much space the cumulative index trees will |
406 | * take; enlarge the file, and memory-map it. |
407 | */ |
408 | if (fstat(fd, &st) < 0) { |
409 | perror("agedu: fstat"); |
410 | return 1; |
411 | } |
412 | |
413 | printf("Built pathname index, %d entries, %ju bytes\n", count, |
414 | (intmax_t)st.st_size); |
415 | |
416 | totalsize = index_compute_size(st.st_size, count); |
417 | |
418 | if (lseek(fd, totalsize-1, SEEK_SET) < 0) { |
419 | perror("agedu: lseek"); |
420 | return 1; |
421 | } |
422 | if (write(fd, "\0", 1) < 1) { |
423 | perror("agedu: write"); |
424 | return 1; |
425 | } |
426 | |
427 | printf("Upper bound on index file size = %ju bytes\n", |
428 | (intmax_t)totalsize); |
429 | |
430 | mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); |
431 | if (!mappedfile) { |
432 | perror("agedu: mmap"); |
433 | return 1; |
434 | } |
435 | |
436 | ib = indexbuild_new(mappedfile, st.st_size, count); |
437 | tw = triewalk_new(mappedfile); |
438 | while ((tf = triewalk_next(tw, NULL)) != NULL) |
439 | indexbuild_add(ib, tf); |
440 | triewalk_free(tw); |
441 | realsize = indexbuild_realsize(ib); |
442 | indexbuild_free(ib); |
443 | |
444 | munmap(mappedfile, totalsize); |
445 | ftruncate(fd, realsize); |
446 | close(fd); |
447 | printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); |
7cf11b75 |
448 | } else if (mode == TEXT) { |
70322ae3 |
449 | time_t t; |
450 | struct tm tm; |
451 | int nunits; |
452 | char unit[2]; |
453 | size_t pathlen; |
454 | |
455 | t = time(NULL); |
456 | |
457 | if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) { |
458 | fprintf(stderr, "%s: minimum age should be a number followed by" |
459 | " one of d,w,m,y\n", PNAME); |
460 | return 1; |
461 | } |
462 | |
463 | if (unit[0] == 'd') { |
464 | t -= 86400 * nunits; |
465 | } else if (unit[0] == 'w') { |
466 | t -= 86400 * 7 * nunits; |
467 | } else { |
468 | int ym; |
469 | |
470 | tm = *localtime(&t); |
471 | ym = tm.tm_year * 12 + tm.tm_mon; |
472 | |
473 | if (unit[0] == 'm') |
474 | ym -= nunits; |
475 | else |
476 | ym -= 12 * nunits; |
477 | |
478 | tm.tm_year = ym / 12; |
479 | tm.tm_mon = ym % 12; |
480 | |
481 | t = mktime(&tm); |
482 | } |
483 | |
484 | fd = open(filename, O_RDONLY); |
485 | if (fd < 0) { |
486 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
487 | strerror(errno)); |
488 | return 1; |
489 | } |
490 | if (fstat(fd, &st) < 0) { |
491 | perror("agedu: fstat"); |
492 | return 1; |
493 | } |
494 | totalsize = st.st_size; |
495 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
496 | if (!mappedfile) { |
497 | perror("agedu: mmap"); |
498 | return 1; |
499 | } |
500 | |
501 | /* |
502 | * Trim trailing slash, just in case. |
503 | */ |
504 | pathlen = strlen(rootdir); |
505 | if (pathlen > 0 && rootdir[pathlen-1] == '/') |
506 | rootdir[--pathlen] = '\0'; |
507 | |
7cf11b75 |
508 | text_query(mappedfile, rootdir, t, 1); |
70322ae3 |
509 | } else if (mode == HTML) { |
510 | size_t pathlen; |
511 | unsigned long xi; |
512 | char *html; |
513 | |
514 | fd = open(filename, O_RDONLY); |
515 | if (fd < 0) { |
516 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
517 | strerror(errno)); |
518 | return 1; |
519 | } |
520 | if (fstat(fd, &st) < 0) { |
521 | perror("agedu: fstat"); |
522 | return 1; |
523 | } |
524 | totalsize = st.st_size; |
525 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
526 | if (!mappedfile) { |
527 | perror("agedu: mmap"); |
528 | return 1; |
529 | } |
530 | |
531 | /* |
532 | * Trim trailing slash, just in case. |
533 | */ |
534 | pathlen = strlen(rootdir); |
535 | if (pathlen > 0 && rootdir[pathlen-1] == '/') |
536 | rootdir[--pathlen] = '\0'; |
537 | |
538 | xi = trie_before(mappedfile, rootdir); |
539 | html = html_query(mappedfile, xi, NULL); |
540 | fputs(html, stdout); |
541 | } else if (mode == DUMP) { |
542 | size_t maxpathlen; |
543 | char *buf; |
544 | |
545 | fd = open(filename, O_RDONLY); |
546 | if (fd < 0) { |
547 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
548 | strerror(errno)); |
549 | return 1; |
550 | } |
551 | if (fstat(fd, &st) < 0) { |
552 | perror("agedu: fstat"); |
553 | return 1; |
554 | } |
555 | totalsize = st.st_size; |
556 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
557 | if (!mappedfile) { |
558 | perror("agedu: mmap"); |
559 | return 1; |
560 | } |
561 | |
562 | maxpathlen = trie_maxpathlen(mappedfile); |
563 | buf = snewn(maxpathlen, char); |
564 | |
565 | tw = triewalk_new(mappedfile); |
566 | while ((tf = triewalk_next(tw, buf)) != NULL) { |
567 | printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime); |
568 | } |
569 | triewalk_free(tw); |
570 | } else if (mode == HTTPD) { |
571 | fd = open(filename, O_RDONLY); |
572 | if (fd < 0) { |
573 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
574 | strerror(errno)); |
575 | return 1; |
576 | } |
577 | if (fstat(fd, &st) < 0) { |
578 | perror("agedu: fstat"); |
579 | return 1; |
580 | } |
581 | totalsize = st.st_size; |
582 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
583 | if (!mappedfile) { |
584 | perror("agedu: mmap"); |
585 | return 1; |
586 | } |
587 | |
812e4bf2 |
588 | run_httpd(mappedfile, auth); |
70322ae3 |
589 | } |
590 | |
591 | return 0; |
592 | } |