70322ae3 |
1 | /* |
2 | * Main program for agedu. |
3 | */ |
4 | |
5 | #define _GNU_SOURCE |
6 | #include <stdio.h> |
7 | #include <errno.h> |
8 | #include <stdarg.h> |
9 | #include <stdlib.h> |
10 | #include <stdint.h> |
11 | #include <string.h> |
12 | #include <time.h> |
13 | |
14 | #include <unistd.h> |
15 | #include <sys/types.h> |
16 | #include <fcntl.h> |
17 | #include <sys/mman.h> |
8b1f55d6 |
18 | #include <termios.h> |
19 | #include <sys/ioctl.h> |
9d0b9596 |
20 | #include <fnmatch.h> |
70322ae3 |
21 | |
22 | #include "du.h" |
23 | #include "trie.h" |
24 | #include "index.h" |
25 | #include "malloc.h" |
26 | #include "html.h" |
27 | #include "httpd.h" |
28 | |
29 | #define PNAME "agedu" |
30 | |
31 | void fatal(const char *fmt, ...) |
32 | { |
33 | va_list ap; |
34 | fprintf(stderr, "%s: ", PNAME); |
35 | va_start(ap, fmt); |
36 | vfprintf(stderr, fmt, ap); |
37 | va_end(ap); |
38 | fprintf(stderr, "\n"); |
39 | exit(1); |
40 | } |
41 | |
9d0b9596 |
42 | struct inclusion_exclusion { |
43 | int include; |
44 | const char *wildcard; |
45 | int path; |
46 | }; |
47 | |
70322ae3 |
48 | struct ctx { |
49 | triebuild *tb; |
50 | dev_t datafile_dev, filesystem_dev; |
51 | ino_t datafile_ino; |
52 | time_t last_output_update; |
8b1f55d6 |
53 | int progress, progwidth; |
9d0b9596 |
54 | struct inclusion_exclusion *inex; |
55 | int ninex; |
56 | int crossfs; |
70322ae3 |
57 | }; |
58 | |
59 | static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) |
60 | { |
61 | struct ctx *ctx = (struct ctx *)vctx; |
62 | struct trie_file file; |
63 | time_t t; |
9d0b9596 |
64 | int i, include; |
65 | const char *filename; |
70322ae3 |
66 | |
67 | /* |
68 | * Filter out our own data file. |
69 | */ |
70 | if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino) |
71 | return 0; |
72 | |
73 | /* |
74 | * Don't cross the streams^W^Wany file system boundary. |
70322ae3 |
75 | */ |
9d0b9596 |
76 | if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev) |
70322ae3 |
77 | return 0; |
78 | |
79 | /* |
9d0b9596 |
80 | * Filter based on wildcards. |
70322ae3 |
81 | */ |
9d0b9596 |
82 | include = 1; |
83 | filename = strrchr(pathname, '/'); |
84 | if (!filename) |
85 | filename = pathname; |
86 | else |
87 | filename++; |
88 | for (i = 0; i < ctx->ninex; i++) { |
89 | if (fnmatch(ctx->inex[i].wildcard, |
90 | ctx->inex[i].path ? pathname : filename, |
91 | FNM_PATHNAME) == 0) |
92 | include = ctx->inex[i].include; |
93 | } |
94 | if (!include) |
95 | return 1; /* filter, but don't prune */ |
70322ae3 |
96 | |
97 | file.blocks = st->st_blocks; |
98 | file.atime = st->st_atime; |
99 | triebuild_add(ctx->tb, pathname, &file); |
100 | |
101 | t = time(NULL); |
102 | if (t != ctx->last_output_update) { |
8b1f55d6 |
103 | if (ctx->progress) { |
104 | fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth, |
105 | pathname); |
106 | fflush(stderr); |
107 | } |
70322ae3 |
108 | ctx->last_output_update = t; |
109 | } |
110 | |
111 | return 1; |
112 | } |
113 | |
114 | static void run_query(const void *mappedfile, const char *rootdir, |
115 | time_t t, int depth) |
116 | { |
117 | size_t maxpathlen; |
118 | char *pathbuf; |
119 | unsigned long xi1, xi2; |
120 | unsigned long long s1, s2; |
121 | |
122 | maxpathlen = trie_maxpathlen(mappedfile); |
123 | pathbuf = snewn(maxpathlen + 1, char); |
124 | |
125 | /* |
126 | * We want to query everything between the supplied filename |
127 | * (inclusive) and that filename with a ^A on the end |
128 | * (exclusive). So find the x indices for each. |
129 | */ |
130 | sprintf(pathbuf, "%s\001", rootdir); |
131 | xi1 = trie_before(mappedfile, rootdir); |
132 | xi2 = trie_before(mappedfile, pathbuf); |
133 | |
134 | /* |
135 | * Now do the lookups in the age index. |
136 | */ |
137 | s1 = index_query(mappedfile, xi1, t); |
138 | s2 = index_query(mappedfile, xi2, t); |
139 | |
140 | /* Display in units of 2 512-byte blocks = 1Kb */ |
141 | printf("%-11llu %s\n", (s2 - s1) / 2, rootdir); |
142 | |
143 | if (depth > 0) { |
144 | /* |
145 | * Now scan for first-level subdirectories and report |
146 | * those too. |
147 | */ |
148 | xi1++; |
149 | while (xi1 < xi2) { |
150 | trie_getpath(mappedfile, xi1, pathbuf); |
151 | run_query(mappedfile, pathbuf, t, depth-1); |
152 | strcat(pathbuf, "\001"); |
153 | xi1 = trie_before(mappedfile, pathbuf); |
154 | } |
155 | } |
156 | } |
157 | |
158 | int main(int argc, char **argv) |
159 | { |
160 | int fd, count; |
161 | struct ctx actx, *ctx = &actx; |
162 | struct stat st; |
163 | off_t totalsize, realsize; |
164 | void *mappedfile; |
165 | triewalk *tw; |
166 | indexbuild *ib; |
167 | const struct trie_file *tf; |
168 | char *filename = "agedu.dat"; |
169 | char *rootdir = NULL; |
170 | int doing_opts = 1; |
171 | enum { QUERY, HTML, SCAN, DUMP, HTTPD } mode = QUERY; |
172 | char *minage = "0d"; |
812e4bf2 |
173 | int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; |
8b1f55d6 |
174 | int progress = 1; |
9d0b9596 |
175 | struct inclusion_exclusion *inex = NULL; |
176 | int ninex = 0, inexsize = 0; |
177 | int crossfs = 0; |
70322ae3 |
178 | |
179 | while (--argc > 0) { |
180 | char *p = *++argv; |
181 | char *optval; |
182 | |
183 | if (doing_opts && *p == '-') { |
184 | if (!strcmp(p, "--")) { |
185 | doing_opts = 0; |
186 | } else if (p[1] == '-') { |
187 | char *optval = strchr(p, '='); |
188 | if (optval) |
189 | *optval++ = '\0'; |
190 | if (!strcmp(p, "--help")) { |
191 | printf("FIXME: usage();\n"); |
192 | return 0; |
193 | } else if (!strcmp(p, "--version")) { |
194 | printf("FIXME: version();\n"); |
195 | return 0; |
196 | } else if (!strcmp(p, "--licence") || |
197 | !strcmp(p, "--license")) { |
198 | printf("FIXME: licence();\n"); |
199 | return 0; |
200 | } else if (!strcmp(p, "--scan")) { |
201 | mode = SCAN; |
202 | } else if (!strcmp(p, "--dump")) { |
203 | mode = DUMP; |
204 | } else if (!strcmp(p, "--html")) { |
205 | mode = HTML; |
206 | } else if (!strcmp(p, "--httpd") || |
207 | !strcmp(p, "--server")) { |
208 | mode = HTTPD; |
8b1f55d6 |
209 | } else if (!strcmp(p, "--progress") || |
210 | !strcmp(p, "--scan-progress")) { |
211 | progress = 2; |
212 | } else if (!strcmp(p, "--no-progress") || |
213 | !strcmp(p, "--no-scan-progress")) { |
214 | progress = 0; |
215 | } else if (!strcmp(p, "--tty-progress") || |
216 | !strcmp(p, "--tty-scan-progress") || |
217 | !strcmp(p, "--progress-tty") || |
218 | !strcmp(p, "--scan-progress-tty")) { |
219 | progress = 1; |
9d0b9596 |
220 | } else if (!strcmp(p, "--crossfs")) { |
221 | crossfs = 1; |
222 | } else if (!strcmp(p, "--no-crossfs")) { |
223 | crossfs = 0; |
70322ae3 |
224 | } else if (!strcmp(p, "--file") || |
812e4bf2 |
225 | !strcmp(p, "--auth") || |
226 | !strcmp(p, "--http-auth") || |
227 | !strcmp(p, "--httpd-auth") || |
228 | !strcmp(p, "--server-auth") || |
70322ae3 |
229 | !strcmp(p, "--minimum-age") || |
230 | !strcmp(p, "--min-age") || |
9d0b9596 |
231 | !strcmp(p, "--age") || |
232 | !strcmp(p, "--include") || |
233 | !strcmp(p, "--include-path") || |
234 | !strcmp(p, "--exclude") || |
235 | !strcmp(p, "--exclude-path")) { |
70322ae3 |
236 | /* |
237 | * Long options requiring values. |
238 | */ |
239 | if (!optval) { |
240 | if (--argc > 0) { |
241 | optval = *++argv; |
242 | } else { |
243 | fprintf(stderr, "%s: option '%s' requires" |
244 | " an argument\n", PNAME, p); |
245 | return 1; |
246 | } |
247 | } |
248 | if (!strcmp(p, "--file")) { |
249 | filename = optval; |
250 | } else if (!strcmp(p, "--minimum-age") || |
251 | !strcmp(p, "--min-age") || |
252 | !strcmp(p, "--age")) { |
253 | minage = optval; |
812e4bf2 |
254 | } else if (!strcmp(p, "--auth") || |
255 | !strcmp(p, "--http-auth") || |
256 | !strcmp(p, "--httpd-auth") || |
257 | !strcmp(p, "--server-auth")) { |
258 | if (!strcmp(optval, "magic")) |
259 | auth = HTTPD_AUTH_MAGIC; |
260 | else if (!strcmp(optval, "basic")) |
261 | auth = HTTPD_AUTH_BASIC; |
262 | else if (!strcmp(optval, "none")) |
263 | auth = HTTPD_AUTH_NONE; |
264 | else if (!strcmp(optval, "default")) |
265 | auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; |
266 | else { |
267 | fprintf(stderr, "%s: unrecognised authentication" |
268 | " type '%s'\n%*s options are 'magic'," |
269 | " 'basic', 'none', 'default'\n", |
270 | PNAME, optval, (int)strlen(PNAME), ""); |
271 | return 1; |
272 | } |
9d0b9596 |
273 | } else if (!strcmp(p, "--include") || |
274 | !strcmp(p, "--include-path") || |
275 | !strcmp(p, "--exclude") || |
276 | !strcmp(p, "--exclude-path")) { |
277 | if (ninex >= inexsize) { |
278 | inexsize = ninex * 3 / 2 + 16; |
279 | inex = sresize(inex, inexsize, |
280 | struct inclusion_exclusion); |
281 | } |
282 | inex[ninex].path = (!strcmp(p, "--include-path") || |
283 | !strcmp(p, "--exclude-path")); |
284 | inex[ninex].include = (!strcmp(p, "--include") || |
285 | !strcmp(p, "--include-path")); |
286 | inex[ninex].wildcard = optval; |
287 | ninex++; |
70322ae3 |
288 | } |
289 | } else { |
290 | fprintf(stderr, "%s: unrecognised option '%s'\n", |
291 | PNAME, p); |
292 | return 1; |
293 | } |
294 | } else { |
295 | p++; |
296 | while (*p) { |
297 | char c = *p++; |
298 | |
299 | switch (c) { |
300 | /* Options requiring arguments. */ |
301 | case 'f': |
302 | case 'a': |
303 | if (*p) { |
304 | optval = p; |
305 | p += strlen(p); |
306 | } else if (--argc > 0) { |
307 | optval = *++argv; |
308 | } else { |
309 | fprintf(stderr, "%s: option '-%c' requires" |
310 | " an argument\n", PNAME, c); |
311 | return 1; |
312 | } |
313 | switch (c) { |
314 | case 'f': /* data file name */ |
315 | filename = optval; |
316 | break; |
317 | case 'a': /* maximum age */ |
318 | minage = optval; |
319 | break; |
320 | } |
321 | break; |
322 | case 's': |
323 | mode = SCAN; |
324 | break; |
325 | default: |
326 | fprintf(stderr, "%s: unrecognised option '-%c'\n", |
327 | PNAME, c); |
328 | return 1; |
329 | } |
330 | } |
331 | } |
332 | } else { |
333 | if (!rootdir) { |
334 | rootdir = p; |
335 | } else { |
336 | fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p); |
337 | return 1; |
338 | } |
339 | } |
340 | } |
341 | |
342 | if (!rootdir) |
343 | rootdir = "."; |
344 | |
345 | if (mode == SCAN) { |
346 | |
347 | fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); |
348 | if (fd < 0) { |
349 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
350 | strerror(errno)); |
351 | return 1; |
352 | } |
353 | |
354 | if (stat(rootdir, &st) < 0) { |
355 | fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir, |
356 | strerror(errno)); |
357 | return 1; |
358 | } |
9d0b9596 |
359 | ctx->filesystem_dev = crossfs ? 0 : st.st_dev; |
70322ae3 |
360 | |
361 | if (fstat(fd, &st) < 0) { |
362 | perror("agedu: fstat"); |
363 | return 1; |
364 | } |
365 | ctx->datafile_dev = st.st_dev; |
366 | ctx->datafile_ino = st.st_ino; |
9d0b9596 |
367 | ctx->inex = inex; |
368 | ctx->ninex = ninex; |
369 | ctx->crossfs = crossfs; |
70322ae3 |
370 | |
371 | ctx->last_output_update = time(NULL); |
372 | |
8b1f55d6 |
373 | /* progress==1 means report progress only if stderr is a tty */ |
374 | if (progress == 1) |
375 | progress = isatty(2) ? 2 : 0; |
376 | ctx->progress = progress; |
377 | { |
378 | struct winsize ws; |
379 | if (progress && ioctl(2, TIOCGWINSZ, &ws) == 0) |
380 | ctx->progwidth = ws.ws_col - 1; |
381 | else |
382 | ctx->progwidth = 79; |
383 | } |
384 | |
70322ae3 |
385 | /* |
386 | * Scan the directory tree, and write out the trie component |
387 | * of the data file. |
388 | */ |
389 | ctx->tb = triebuild_new(fd); |
390 | du(rootdir, gotdata, ctx); |
391 | count = triebuild_finish(ctx->tb); |
392 | triebuild_free(ctx->tb); |
393 | |
8b1f55d6 |
394 | if (ctx->progress) { |
395 | fprintf(stderr, "%-*s\r", ctx->progwidth, ""); |
396 | fflush(stderr); |
397 | } |
70322ae3 |
398 | |
399 | /* |
400 | * Work out how much space the cumulative index trees will |
401 | * take; enlarge the file, and memory-map it. |
402 | */ |
403 | if (fstat(fd, &st) < 0) { |
404 | perror("agedu: fstat"); |
405 | return 1; |
406 | } |
407 | |
408 | printf("Built pathname index, %d entries, %ju bytes\n", count, |
409 | (intmax_t)st.st_size); |
410 | |
411 | totalsize = index_compute_size(st.st_size, count); |
412 | |
413 | if (lseek(fd, totalsize-1, SEEK_SET) < 0) { |
414 | perror("agedu: lseek"); |
415 | return 1; |
416 | } |
417 | if (write(fd, "\0", 1) < 1) { |
418 | perror("agedu: write"); |
419 | return 1; |
420 | } |
421 | |
422 | printf("Upper bound on index file size = %ju bytes\n", |
423 | (intmax_t)totalsize); |
424 | |
425 | mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); |
426 | if (!mappedfile) { |
427 | perror("agedu: mmap"); |
428 | return 1; |
429 | } |
430 | |
431 | ib = indexbuild_new(mappedfile, st.st_size, count); |
432 | tw = triewalk_new(mappedfile); |
433 | while ((tf = triewalk_next(tw, NULL)) != NULL) |
434 | indexbuild_add(ib, tf); |
435 | triewalk_free(tw); |
436 | realsize = indexbuild_realsize(ib); |
437 | indexbuild_free(ib); |
438 | |
439 | munmap(mappedfile, totalsize); |
440 | ftruncate(fd, realsize); |
441 | close(fd); |
442 | printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); |
443 | } else if (mode == QUERY) { |
444 | time_t t; |
445 | struct tm tm; |
446 | int nunits; |
447 | char unit[2]; |
448 | size_t pathlen; |
449 | |
450 | t = time(NULL); |
451 | |
452 | if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) { |
453 | fprintf(stderr, "%s: minimum age should be a number followed by" |
454 | " one of d,w,m,y\n", PNAME); |
455 | return 1; |
456 | } |
457 | |
458 | if (unit[0] == 'd') { |
459 | t -= 86400 * nunits; |
460 | } else if (unit[0] == 'w') { |
461 | t -= 86400 * 7 * nunits; |
462 | } else { |
463 | int ym; |
464 | |
465 | tm = *localtime(&t); |
466 | ym = tm.tm_year * 12 + tm.tm_mon; |
467 | |
468 | if (unit[0] == 'm') |
469 | ym -= nunits; |
470 | else |
471 | ym -= 12 * nunits; |
472 | |
473 | tm.tm_year = ym / 12; |
474 | tm.tm_mon = ym % 12; |
475 | |
476 | t = mktime(&tm); |
477 | } |
478 | |
479 | fd = open(filename, O_RDONLY); |
480 | if (fd < 0) { |
481 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
482 | strerror(errno)); |
483 | return 1; |
484 | } |
485 | if (fstat(fd, &st) < 0) { |
486 | perror("agedu: fstat"); |
487 | return 1; |
488 | } |
489 | totalsize = st.st_size; |
490 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
491 | if (!mappedfile) { |
492 | perror("agedu: mmap"); |
493 | return 1; |
494 | } |
495 | |
496 | /* |
497 | * Trim trailing slash, just in case. |
498 | */ |
499 | pathlen = strlen(rootdir); |
500 | if (pathlen > 0 && rootdir[pathlen-1] == '/') |
501 | rootdir[--pathlen] = '\0'; |
502 | |
503 | run_query(mappedfile, rootdir, t, 1); |
504 | } else if (mode == HTML) { |
505 | size_t pathlen; |
506 | unsigned long xi; |
507 | char *html; |
508 | |
509 | fd = open(filename, O_RDONLY); |
510 | if (fd < 0) { |
511 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
512 | strerror(errno)); |
513 | return 1; |
514 | } |
515 | if (fstat(fd, &st) < 0) { |
516 | perror("agedu: fstat"); |
517 | return 1; |
518 | } |
519 | totalsize = st.st_size; |
520 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
521 | if (!mappedfile) { |
522 | perror("agedu: mmap"); |
523 | return 1; |
524 | } |
525 | |
526 | /* |
527 | * Trim trailing slash, just in case. |
528 | */ |
529 | pathlen = strlen(rootdir); |
530 | if (pathlen > 0 && rootdir[pathlen-1] == '/') |
531 | rootdir[--pathlen] = '\0'; |
532 | |
533 | xi = trie_before(mappedfile, rootdir); |
534 | html = html_query(mappedfile, xi, NULL); |
535 | fputs(html, stdout); |
536 | } else if (mode == DUMP) { |
537 | size_t maxpathlen; |
538 | char *buf; |
539 | |
540 | fd = open(filename, O_RDONLY); |
541 | if (fd < 0) { |
542 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
543 | strerror(errno)); |
544 | return 1; |
545 | } |
546 | if (fstat(fd, &st) < 0) { |
547 | perror("agedu: fstat"); |
548 | return 1; |
549 | } |
550 | totalsize = st.st_size; |
551 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
552 | if (!mappedfile) { |
553 | perror("agedu: mmap"); |
554 | return 1; |
555 | } |
556 | |
557 | maxpathlen = trie_maxpathlen(mappedfile); |
558 | buf = snewn(maxpathlen, char); |
559 | |
560 | tw = triewalk_new(mappedfile); |
561 | while ((tf = triewalk_next(tw, buf)) != NULL) { |
562 | printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime); |
563 | } |
564 | triewalk_free(tw); |
565 | } else if (mode == HTTPD) { |
566 | fd = open(filename, O_RDONLY); |
567 | if (fd < 0) { |
568 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
569 | strerror(errno)); |
570 | return 1; |
571 | } |
572 | if (fstat(fd, &st) < 0) { |
573 | perror("agedu: fstat"); |
574 | return 1; |
575 | } |
576 | totalsize = st.st_size; |
577 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
578 | if (!mappedfile) { |
579 | perror("agedu: mmap"); |
580 | return 1; |
581 | } |
582 | |
812e4bf2 |
583 | run_httpd(mappedfile, auth); |
70322ae3 |
584 | } |
585 | |
586 | return 0; |
587 | } |