242865e4afefb9dcd633665a448bcf6eb7d59ed7
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #define _GNU_SOURCE
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <time.h>
13
14 #include <unistd.h>
15 #include <sys/types.h>
16 #include <fcntl.h>
17 #include <sys/mman.h>
18
19 #include "du.h"
20 #include "trie.h"
21 #include "index.h"
22 #include "malloc.h"
23 #include "html.h"
24 #include "httpd.h"
25
26 #define PNAME "agedu"
27
28 void fatal(const char *fmt, ...)
29 {
30 va_list ap;
31 fprintf(stderr, "%s: ", PNAME);
32 va_start(ap, fmt);
33 vfprintf(stderr, fmt, ap);
34 va_end(ap);
35 fprintf(stderr, "\n");
36 exit(1);
37 }
38
39 struct ctx {
40 triebuild *tb;
41 dev_t datafile_dev, filesystem_dev;
42 ino_t datafile_ino;
43 time_t last_output_update;
44 };
45
46 static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
47 {
48 struct ctx *ctx = (struct ctx *)vctx;
49 struct trie_file file;
50 time_t t;
51
52 /*
53 * Filter out our own data file.
54 */
55 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
56 return 0;
57
58 /*
59 * Don't cross the streams^W^Wany file system boundary.
60 * (FIXME: this should be a configurable option.)
61 */
62 if (st->st_dev != ctx->filesystem_dev)
63 return 0;
64
65 /*
66 * FIXME: other filtering in gotdata will be needed, when we
67 * implement serious filtering.
68 */
69
70 file.blocks = st->st_blocks;
71 file.atime = st->st_atime;
72 triebuild_add(ctx->tb, pathname, &file);
73
74 t = time(NULL);
75 if (t != ctx->last_output_update) {
76 fprintf(stderr, "%-79.79s\r", pathname);
77 fflush(stderr);
78 ctx->last_output_update = t;
79 }
80
81 return 1;
82 }
83
84 static void run_query(const void *mappedfile, const char *rootdir,
85 time_t t, int depth)
86 {
87 size_t maxpathlen;
88 char *pathbuf;
89 unsigned long xi1, xi2;
90 unsigned long long s1, s2;
91
92 maxpathlen = trie_maxpathlen(mappedfile);
93 pathbuf = snewn(maxpathlen + 1, char);
94
95 /*
96 * We want to query everything between the supplied filename
97 * (inclusive) and that filename with a ^A on the end
98 * (exclusive). So find the x indices for each.
99 */
100 sprintf(pathbuf, "%s\001", rootdir);
101 xi1 = trie_before(mappedfile, rootdir);
102 xi2 = trie_before(mappedfile, pathbuf);
103
104 /*
105 * Now do the lookups in the age index.
106 */
107 s1 = index_query(mappedfile, xi1, t);
108 s2 = index_query(mappedfile, xi2, t);
109
110 /* Display in units of 2 512-byte blocks = 1Kb */
111 printf("%-11llu %s\n", (s2 - s1) / 2, rootdir);
112
113 if (depth > 0) {
114 /*
115 * Now scan for first-level subdirectories and report
116 * those too.
117 */
118 xi1++;
119 while (xi1 < xi2) {
120 trie_getpath(mappedfile, xi1, pathbuf);
121 run_query(mappedfile, pathbuf, t, depth-1);
122 strcat(pathbuf, "\001");
123 xi1 = trie_before(mappedfile, pathbuf);
124 }
125 }
126 }
127
128 int main(int argc, char **argv)
129 {
130 int fd, count;
131 struct ctx actx, *ctx = &actx;
132 struct stat st;
133 off_t totalsize, realsize;
134 void *mappedfile;
135 triewalk *tw;
136 indexbuild *ib;
137 const struct trie_file *tf;
138 char *filename = "agedu.dat";
139 char *rootdir = NULL;
140 int doing_opts = 1;
141 enum { QUERY, HTML, SCAN, DUMP, HTTPD } mode = QUERY;
142 char *minage = "0d";
143 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
144
145 while (--argc > 0) {
146 char *p = *++argv;
147 char *optval;
148
149 if (doing_opts && *p == '-') {
150 if (!strcmp(p, "--")) {
151 doing_opts = 0;
152 } else if (p[1] == '-') {
153 char *optval = strchr(p, '=');
154 if (optval)
155 *optval++ = '\0';
156 if (!strcmp(p, "--help")) {
157 printf("FIXME: usage();\n");
158 return 0;
159 } else if (!strcmp(p, "--version")) {
160 printf("FIXME: version();\n");
161 return 0;
162 } else if (!strcmp(p, "--licence") ||
163 !strcmp(p, "--license")) {
164 printf("FIXME: licence();\n");
165 return 0;
166 } else if (!strcmp(p, "--scan")) {
167 mode = SCAN;
168 } else if (!strcmp(p, "--dump")) {
169 mode = DUMP;
170 } else if (!strcmp(p, "--html")) {
171 mode = HTML;
172 } else if (!strcmp(p, "--httpd") ||
173 !strcmp(p, "--server")) {
174 mode = HTTPD;
175 } else if (!strcmp(p, "--file") ||
176 !strcmp(p, "--auth") ||
177 !strcmp(p, "--http-auth") ||
178 !strcmp(p, "--httpd-auth") ||
179 !strcmp(p, "--server-auth") ||
180 !strcmp(p, "--minimum-age") ||
181 !strcmp(p, "--min-age") ||
182 !strcmp(p, "--age")) {
183 /*
184 * Long options requiring values.
185 */
186 if (!optval) {
187 if (--argc > 0) {
188 optval = *++argv;
189 } else {
190 fprintf(stderr, "%s: option '%s' requires"
191 " an argument\n", PNAME, p);
192 return 1;
193 }
194 }
195 if (!strcmp(p, "--file")) {
196 filename = optval;
197 } else if (!strcmp(p, "--minimum-age") ||
198 !strcmp(p, "--min-age") ||
199 !strcmp(p, "--age")) {
200 minage = optval;
201 } else if (!strcmp(p, "--auth") ||
202 !strcmp(p, "--http-auth") ||
203 !strcmp(p, "--httpd-auth") ||
204 !strcmp(p, "--server-auth")) {
205 if (!strcmp(optval, "magic"))
206 auth = HTTPD_AUTH_MAGIC;
207 else if (!strcmp(optval, "basic"))
208 auth = HTTPD_AUTH_BASIC;
209 else if (!strcmp(optval, "none"))
210 auth = HTTPD_AUTH_NONE;
211 else if (!strcmp(optval, "default"))
212 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
213 else {
214 fprintf(stderr, "%s: unrecognised authentication"
215 " type '%s'\n%*s options are 'magic',"
216 " 'basic', 'none', 'default'\n",
217 PNAME, optval, (int)strlen(PNAME), "");
218 return 1;
219 }
220 }
221 } else {
222 fprintf(stderr, "%s: unrecognised option '%s'\n",
223 PNAME, p);
224 return 1;
225 }
226 } else {
227 p++;
228 while (*p) {
229 char c = *p++;
230
231 switch (c) {
232 /* Options requiring arguments. */
233 case 'f':
234 case 'a':
235 if (*p) {
236 optval = p;
237 p += strlen(p);
238 } else if (--argc > 0) {
239 optval = *++argv;
240 } else {
241 fprintf(stderr, "%s: option '-%c' requires"
242 " an argument\n", PNAME, c);
243 return 1;
244 }
245 switch (c) {
246 case 'f': /* data file name */
247 filename = optval;
248 break;
249 case 'a': /* maximum age */
250 minage = optval;
251 break;
252 }
253 break;
254 case 's':
255 mode = SCAN;
256 break;
257 default:
258 fprintf(stderr, "%s: unrecognised option '-%c'\n",
259 PNAME, c);
260 return 1;
261 }
262 }
263 }
264 } else {
265 if (!rootdir) {
266 rootdir = p;
267 } else {
268 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
269 return 1;
270 }
271 }
272 }
273
274 if (!rootdir)
275 rootdir = ".";
276
277 if (mode == SCAN) {
278
279 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
280 if (fd < 0) {
281 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
282 strerror(errno));
283 return 1;
284 }
285
286 if (stat(rootdir, &st) < 0) {
287 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir,
288 strerror(errno));
289 return 1;
290 }
291 ctx->filesystem_dev = st.st_dev;
292
293 if (fstat(fd, &st) < 0) {
294 perror("agedu: fstat");
295 return 1;
296 }
297 ctx->datafile_dev = st.st_dev;
298 ctx->datafile_ino = st.st_ino;
299
300 ctx->last_output_update = time(NULL);
301
302 /*
303 * Scan the directory tree, and write out the trie component
304 * of the data file.
305 */
306 ctx->tb = triebuild_new(fd);
307 du(rootdir, gotdata, ctx);
308 count = triebuild_finish(ctx->tb);
309 triebuild_free(ctx->tb);
310
311 fprintf(stderr, "%-79s\r", "");
312 fflush(stderr);
313
314 /*
315 * Work out how much space the cumulative index trees will
316 * take; enlarge the file, and memory-map it.
317 */
318 if (fstat(fd, &st) < 0) {
319 perror("agedu: fstat");
320 return 1;
321 }
322
323 printf("Built pathname index, %d entries, %ju bytes\n", count,
324 (intmax_t)st.st_size);
325
326 totalsize = index_compute_size(st.st_size, count);
327
328 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
329 perror("agedu: lseek");
330 return 1;
331 }
332 if (write(fd, "\0", 1) < 1) {
333 perror("agedu: write");
334 return 1;
335 }
336
337 printf("Upper bound on index file size = %ju bytes\n",
338 (intmax_t)totalsize);
339
340 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
341 if (!mappedfile) {
342 perror("agedu: mmap");
343 return 1;
344 }
345
346 ib = indexbuild_new(mappedfile, st.st_size, count);
347 tw = triewalk_new(mappedfile);
348 while ((tf = triewalk_next(tw, NULL)) != NULL)
349 indexbuild_add(ib, tf);
350 triewalk_free(tw);
351 realsize = indexbuild_realsize(ib);
352 indexbuild_free(ib);
353
354 munmap(mappedfile, totalsize);
355 ftruncate(fd, realsize);
356 close(fd);
357 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
358 } else if (mode == QUERY) {
359 time_t t;
360 struct tm tm;
361 int nunits;
362 char unit[2];
363 size_t pathlen;
364
365 t = time(NULL);
366
367 if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) {
368 fprintf(stderr, "%s: minimum age should be a number followed by"
369 " one of d,w,m,y\n", PNAME);
370 return 1;
371 }
372
373 if (unit[0] == 'd') {
374 t -= 86400 * nunits;
375 } else if (unit[0] == 'w') {
376 t -= 86400 * 7 * nunits;
377 } else {
378 int ym;
379
380 tm = *localtime(&t);
381 ym = tm.tm_year * 12 + tm.tm_mon;
382
383 if (unit[0] == 'm')
384 ym -= nunits;
385 else
386 ym -= 12 * nunits;
387
388 tm.tm_year = ym / 12;
389 tm.tm_mon = ym % 12;
390
391 t = mktime(&tm);
392 }
393
394 fd = open(filename, O_RDONLY);
395 if (fd < 0) {
396 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
397 strerror(errno));
398 return 1;
399 }
400 if (fstat(fd, &st) < 0) {
401 perror("agedu: fstat");
402 return 1;
403 }
404 totalsize = st.st_size;
405 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
406 if (!mappedfile) {
407 perror("agedu: mmap");
408 return 1;
409 }
410
411 /*
412 * Trim trailing slash, just in case.
413 */
414 pathlen = strlen(rootdir);
415 if (pathlen > 0 && rootdir[pathlen-1] == '/')
416 rootdir[--pathlen] = '\0';
417
418 run_query(mappedfile, rootdir, t, 1);
419 } else if (mode == HTML) {
420 size_t pathlen;
421 unsigned long xi;
422 char *html;
423
424 fd = open(filename, O_RDONLY);
425 if (fd < 0) {
426 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
427 strerror(errno));
428 return 1;
429 }
430 if (fstat(fd, &st) < 0) {
431 perror("agedu: fstat");
432 return 1;
433 }
434 totalsize = st.st_size;
435 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
436 if (!mappedfile) {
437 perror("agedu: mmap");
438 return 1;
439 }
440
441 /*
442 * Trim trailing slash, just in case.
443 */
444 pathlen = strlen(rootdir);
445 if (pathlen > 0 && rootdir[pathlen-1] == '/')
446 rootdir[--pathlen] = '\0';
447
448 xi = trie_before(mappedfile, rootdir);
449 html = html_query(mappedfile, xi, NULL);
450 fputs(html, stdout);
451 } else if (mode == DUMP) {
452 size_t maxpathlen;
453 char *buf;
454
455 fd = open(filename, O_RDONLY);
456 if (fd < 0) {
457 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
458 strerror(errno));
459 return 1;
460 }
461 if (fstat(fd, &st) < 0) {
462 perror("agedu: fstat");
463 return 1;
464 }
465 totalsize = st.st_size;
466 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
467 if (!mappedfile) {
468 perror("agedu: mmap");
469 return 1;
470 }
471
472 maxpathlen = trie_maxpathlen(mappedfile);
473 buf = snewn(maxpathlen, char);
474
475 tw = triewalk_new(mappedfile);
476 while ((tf = triewalk_next(tw, buf)) != NULL) {
477 printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime);
478 }
479 triewalk_free(tw);
480 } else if (mode == HTTPD) {
481 fd = open(filename, O_RDONLY);
482 if (fd < 0) {
483 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
484 strerror(errno));
485 return 1;
486 }
487 if (fstat(fd, &st) < 0) {
488 perror("agedu: fstat");
489 return 1;
490 }
491 totalsize = st.st_size;
492 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
493 if (!mappedfile) {
494 perror("agedu: mmap");
495 return 1;
496 }
497
498 run_httpd(mappedfile, auth);
499 }
500
501 return 0;
502 }