Initial commit of a basically working but severely unpolished
[sgt/agedu] / agedu.c
1 /*
2 * Main program for agedu.
3 */
4
5 #define _GNU_SOURCE
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <time.h>
13
14 #include <unistd.h>
15 #include <sys/types.h>
16 #include <fcntl.h>
17 #include <sys/mman.h>
18
19 #include "du.h"
20 #include "trie.h"
21 #include "index.h"
22 #include "malloc.h"
23 #include "html.h"
24 #include "httpd.h"
25
26 #define PNAME "agedu"
27
28 void fatal(const char *fmt, ...)
29 {
30 va_list ap;
31 fprintf(stderr, "%s: ", PNAME);
32 va_start(ap, fmt);
33 vfprintf(stderr, fmt, ap);
34 va_end(ap);
35 fprintf(stderr, "\n");
36 exit(1);
37 }
38
39 struct ctx {
40 triebuild *tb;
41 dev_t datafile_dev, filesystem_dev;
42 ino_t datafile_ino;
43 time_t last_output_update;
44 };
45
46 static int gotdata(void *vctx, const char *pathname, const struct stat64 *st)
47 {
48 struct ctx *ctx = (struct ctx *)vctx;
49 struct trie_file file;
50 time_t t;
51
52 /*
53 * Filter out our own data file.
54 */
55 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
56 return 0;
57
58 /*
59 * Don't cross the streams^W^Wany file system boundary.
60 * (FIXME: this should be a configurable option.)
61 */
62 if (st->st_dev != ctx->filesystem_dev)
63 return 0;
64
65 /*
66 * FIXME: other filtering in gotdata will be needed, when we
67 * implement serious filtering.
68 */
69
70 file.blocks = st->st_blocks;
71 file.atime = st->st_atime;
72 triebuild_add(ctx->tb, pathname, &file);
73
74 t = time(NULL);
75 if (t != ctx->last_output_update) {
76 fprintf(stderr, "%-79.79s\r", pathname);
77 fflush(stderr);
78 ctx->last_output_update = t;
79 }
80
81 return 1;
82 }
83
84 static void run_query(const void *mappedfile, const char *rootdir,
85 time_t t, int depth)
86 {
87 size_t maxpathlen;
88 char *pathbuf;
89 unsigned long xi1, xi2;
90 unsigned long long s1, s2;
91
92 maxpathlen = trie_maxpathlen(mappedfile);
93 pathbuf = snewn(maxpathlen + 1, char);
94
95 /*
96 * We want to query everything between the supplied filename
97 * (inclusive) and that filename with a ^A on the end
98 * (exclusive). So find the x indices for each.
99 */
100 sprintf(pathbuf, "%s\001", rootdir);
101 xi1 = trie_before(mappedfile, rootdir);
102 xi2 = trie_before(mappedfile, pathbuf);
103
104 /*
105 * Now do the lookups in the age index.
106 */
107 s1 = index_query(mappedfile, xi1, t);
108 s2 = index_query(mappedfile, xi2, t);
109
110 /* Display in units of 2 512-byte blocks = 1Kb */
111 printf("%-11llu %s\n", (s2 - s1) / 2, rootdir);
112
113 if (depth > 0) {
114 /*
115 * Now scan for first-level subdirectories and report
116 * those too.
117 */
118 xi1++;
119 while (xi1 < xi2) {
120 trie_getpath(mappedfile, xi1, pathbuf);
121 run_query(mappedfile, pathbuf, t, depth-1);
122 strcat(pathbuf, "\001");
123 xi1 = trie_before(mappedfile, pathbuf);
124 }
125 }
126 }
127
128 int main(int argc, char **argv)
129 {
130 int fd, count;
131 struct ctx actx, *ctx = &actx;
132 struct stat st;
133 off_t totalsize, realsize;
134 void *mappedfile;
135 triewalk *tw;
136 indexbuild *ib;
137 const struct trie_file *tf;
138 char *filename = "agedu.dat";
139 char *rootdir = NULL;
140 int doing_opts = 1;
141 enum { QUERY, HTML, SCAN, DUMP, HTTPD } mode = QUERY;
142 char *minage = "0d";
143
144 while (--argc > 0) {
145 char *p = *++argv;
146 char *optval;
147
148 if (doing_opts && *p == '-') {
149 if (!strcmp(p, "--")) {
150 doing_opts = 0;
151 } else if (p[1] == '-') {
152 char *optval = strchr(p, '=');
153 if (optval)
154 *optval++ = '\0';
155 if (!strcmp(p, "--help")) {
156 printf("FIXME: usage();\n");
157 return 0;
158 } else if (!strcmp(p, "--version")) {
159 printf("FIXME: version();\n");
160 return 0;
161 } else if (!strcmp(p, "--licence") ||
162 !strcmp(p, "--license")) {
163 printf("FIXME: licence();\n");
164 return 0;
165 } else if (!strcmp(p, "--scan")) {
166 mode = SCAN;
167 } else if (!strcmp(p, "--dump")) {
168 mode = DUMP;
169 } else if (!strcmp(p, "--html")) {
170 mode = HTML;
171 } else if (!strcmp(p, "--httpd") ||
172 !strcmp(p, "--server")) {
173 mode = HTTPD;
174 } else if (!strcmp(p, "--file") ||
175 !strcmp(p, "--minimum-age") ||
176 !strcmp(p, "--min-age") ||
177 !strcmp(p, "--age")) {
178 /*
179 * Long options requiring values.
180 */
181 if (!optval) {
182 if (--argc > 0) {
183 optval = *++argv;
184 } else {
185 fprintf(stderr, "%s: option '%s' requires"
186 " an argument\n", PNAME, p);
187 return 1;
188 }
189 }
190 if (!strcmp(p, "--file")) {
191 filename = optval;
192 } else if (!strcmp(p, "--minimum-age") ||
193 !strcmp(p, "--min-age") ||
194 !strcmp(p, "--age")) {
195 minage = optval;
196 }
197 } else {
198 fprintf(stderr, "%s: unrecognised option '%s'\n",
199 PNAME, p);
200 return 1;
201 }
202 } else {
203 p++;
204 while (*p) {
205 char c = *p++;
206
207 switch (c) {
208 /* Options requiring arguments. */
209 case 'f':
210 case 'a':
211 if (*p) {
212 optval = p;
213 p += strlen(p);
214 } else if (--argc > 0) {
215 optval = *++argv;
216 } else {
217 fprintf(stderr, "%s: option '-%c' requires"
218 " an argument\n", PNAME, c);
219 return 1;
220 }
221 switch (c) {
222 case 'f': /* data file name */
223 filename = optval;
224 break;
225 case 'a': /* maximum age */
226 minage = optval;
227 break;
228 }
229 break;
230 case 's':
231 mode = SCAN;
232 break;
233 default:
234 fprintf(stderr, "%s: unrecognised option '-%c'\n",
235 PNAME, c);
236 return 1;
237 }
238 }
239 }
240 } else {
241 if (!rootdir) {
242 rootdir = p;
243 } else {
244 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
245 return 1;
246 }
247 }
248 }
249
250 if (!rootdir)
251 rootdir = ".";
252
253 if (mode == SCAN) {
254
255 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU);
256 if (fd < 0) {
257 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
258 strerror(errno));
259 return 1;
260 }
261
262 if (stat(rootdir, &st) < 0) {
263 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir,
264 strerror(errno));
265 return 1;
266 }
267 ctx->filesystem_dev = st.st_dev;
268
269 if (fstat(fd, &st) < 0) {
270 perror("agedu: fstat");
271 return 1;
272 }
273 ctx->datafile_dev = st.st_dev;
274 ctx->datafile_ino = st.st_ino;
275
276 ctx->last_output_update = time(NULL);
277
278 /*
279 * Scan the directory tree, and write out the trie component
280 * of the data file.
281 */
282 ctx->tb = triebuild_new(fd);
283 du(rootdir, gotdata, ctx);
284 count = triebuild_finish(ctx->tb);
285 triebuild_free(ctx->tb);
286
287 fprintf(stderr, "%-79s\r", "");
288 fflush(stderr);
289
290 /*
291 * Work out how much space the cumulative index trees will
292 * take; enlarge the file, and memory-map it.
293 */
294 if (fstat(fd, &st) < 0) {
295 perror("agedu: fstat");
296 return 1;
297 }
298
299 printf("Built pathname index, %d entries, %ju bytes\n", count,
300 (intmax_t)st.st_size);
301
302 totalsize = index_compute_size(st.st_size, count);
303
304 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
305 perror("agedu: lseek");
306 return 1;
307 }
308 if (write(fd, "\0", 1) < 1) {
309 perror("agedu: write");
310 return 1;
311 }
312
313 printf("Upper bound on index file size = %ju bytes\n",
314 (intmax_t)totalsize);
315
316 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
317 if (!mappedfile) {
318 perror("agedu: mmap");
319 return 1;
320 }
321
322 ib = indexbuild_new(mappedfile, st.st_size, count);
323 tw = triewalk_new(mappedfile);
324 while ((tf = triewalk_next(tw, NULL)) != NULL)
325 indexbuild_add(ib, tf);
326 triewalk_free(tw);
327 realsize = indexbuild_realsize(ib);
328 indexbuild_free(ib);
329
330 munmap(mappedfile, totalsize);
331 ftruncate(fd, realsize);
332 close(fd);
333 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize);
334 } else if (mode == QUERY) {
335 time_t t;
336 struct tm tm;
337 int nunits;
338 char unit[2];
339 size_t pathlen;
340
341 t = time(NULL);
342
343 if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) {
344 fprintf(stderr, "%s: minimum age should be a number followed by"
345 " one of d,w,m,y\n", PNAME);
346 return 1;
347 }
348
349 if (unit[0] == 'd') {
350 t -= 86400 * nunits;
351 } else if (unit[0] == 'w') {
352 t -= 86400 * 7 * nunits;
353 } else {
354 int ym;
355
356 tm = *localtime(&t);
357 ym = tm.tm_year * 12 + tm.tm_mon;
358
359 if (unit[0] == 'm')
360 ym -= nunits;
361 else
362 ym -= 12 * nunits;
363
364 tm.tm_year = ym / 12;
365 tm.tm_mon = ym % 12;
366
367 t = mktime(&tm);
368 }
369
370 fd = open(filename, O_RDONLY);
371 if (fd < 0) {
372 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
373 strerror(errno));
374 return 1;
375 }
376 if (fstat(fd, &st) < 0) {
377 perror("agedu: fstat");
378 return 1;
379 }
380 totalsize = st.st_size;
381 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
382 if (!mappedfile) {
383 perror("agedu: mmap");
384 return 1;
385 }
386
387 /*
388 * Trim trailing slash, just in case.
389 */
390 pathlen = strlen(rootdir);
391 if (pathlen > 0 && rootdir[pathlen-1] == '/')
392 rootdir[--pathlen] = '\0';
393
394 run_query(mappedfile, rootdir, t, 1);
395 } else if (mode == HTML) {
396 size_t pathlen;
397 unsigned long xi;
398 char *html;
399
400 fd = open(filename, O_RDONLY);
401 if (fd < 0) {
402 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
403 strerror(errno));
404 return 1;
405 }
406 if (fstat(fd, &st) < 0) {
407 perror("agedu: fstat");
408 return 1;
409 }
410 totalsize = st.st_size;
411 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
412 if (!mappedfile) {
413 perror("agedu: mmap");
414 return 1;
415 }
416
417 /*
418 * Trim trailing slash, just in case.
419 */
420 pathlen = strlen(rootdir);
421 if (pathlen > 0 && rootdir[pathlen-1] == '/')
422 rootdir[--pathlen] = '\0';
423
424 xi = trie_before(mappedfile, rootdir);
425 html = html_query(mappedfile, xi, NULL);
426 fputs(html, stdout);
427 } else if (mode == DUMP) {
428 size_t maxpathlen;
429 char *buf;
430
431 fd = open(filename, O_RDONLY);
432 if (fd < 0) {
433 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
434 strerror(errno));
435 return 1;
436 }
437 if (fstat(fd, &st) < 0) {
438 perror("agedu: fstat");
439 return 1;
440 }
441 totalsize = st.st_size;
442 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
443 if (!mappedfile) {
444 perror("agedu: mmap");
445 return 1;
446 }
447
448 maxpathlen = trie_maxpathlen(mappedfile);
449 buf = snewn(maxpathlen, char);
450
451 tw = triewalk_new(mappedfile);
452 while ((tf = triewalk_next(tw, buf)) != NULL) {
453 printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime);
454 }
455 triewalk_free(tw);
456 } else if (mode == HTTPD) {
457 fd = open(filename, O_RDONLY);
458 if (fd < 0) {
459 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
460 strerror(errno));
461 return 1;
462 }
463 if (fstat(fd, &st) < 0) {
464 perror("agedu: fstat");
465 return 1;
466 }
467 totalsize = st.st_size;
468 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
469 if (!mappedfile) {
470 perror("agedu: mmap");
471 return 1;
472 }
473
474 run_httpd(mappedfile);
475 }
476
477 return 0;
478 }