70322ae3 |
1 | /* |
2 | * Main program for agedu. |
3 | */ |
4 | |
5 | #define _GNU_SOURCE |
6 | #include <stdio.h> |
7 | #include <errno.h> |
8 | #include <stdarg.h> |
9 | #include <stdlib.h> |
10 | #include <stdint.h> |
11 | #include <string.h> |
12 | #include <time.h> |
13 | |
14 | #include <unistd.h> |
15 | #include <sys/types.h> |
16 | #include <fcntl.h> |
17 | #include <sys/mman.h> |
18 | |
19 | #include "du.h" |
20 | #include "trie.h" |
21 | #include "index.h" |
22 | #include "malloc.h" |
23 | #include "html.h" |
24 | #include "httpd.h" |
25 | |
26 | #define PNAME "agedu" |
27 | |
28 | void fatal(const char *fmt, ...) |
29 | { |
30 | va_list ap; |
31 | fprintf(stderr, "%s: ", PNAME); |
32 | va_start(ap, fmt); |
33 | vfprintf(stderr, fmt, ap); |
34 | va_end(ap); |
35 | fprintf(stderr, "\n"); |
36 | exit(1); |
37 | } |
38 | |
39 | struct ctx { |
40 | triebuild *tb; |
41 | dev_t datafile_dev, filesystem_dev; |
42 | ino_t datafile_ino; |
43 | time_t last_output_update; |
44 | }; |
45 | |
46 | static int gotdata(void *vctx, const char *pathname, const struct stat64 *st) |
47 | { |
48 | struct ctx *ctx = (struct ctx *)vctx; |
49 | struct trie_file file; |
50 | time_t t; |
51 | |
52 | /* |
53 | * Filter out our own data file. |
54 | */ |
55 | if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino) |
56 | return 0; |
57 | |
58 | /* |
59 | * Don't cross the streams^W^Wany file system boundary. |
60 | * (FIXME: this should be a configurable option.) |
61 | */ |
62 | if (st->st_dev != ctx->filesystem_dev) |
63 | return 0; |
64 | |
65 | /* |
66 | * FIXME: other filtering in gotdata will be needed, when we |
67 | * implement serious filtering. |
68 | */ |
69 | |
70 | file.blocks = st->st_blocks; |
71 | file.atime = st->st_atime; |
72 | triebuild_add(ctx->tb, pathname, &file); |
73 | |
74 | t = time(NULL); |
75 | if (t != ctx->last_output_update) { |
76 | fprintf(stderr, "%-79.79s\r", pathname); |
77 | fflush(stderr); |
78 | ctx->last_output_update = t; |
79 | } |
80 | |
81 | return 1; |
82 | } |
83 | |
84 | static void run_query(const void *mappedfile, const char *rootdir, |
85 | time_t t, int depth) |
86 | { |
87 | size_t maxpathlen; |
88 | char *pathbuf; |
89 | unsigned long xi1, xi2; |
90 | unsigned long long s1, s2; |
91 | |
92 | maxpathlen = trie_maxpathlen(mappedfile); |
93 | pathbuf = snewn(maxpathlen + 1, char); |
94 | |
95 | /* |
96 | * We want to query everything between the supplied filename |
97 | * (inclusive) and that filename with a ^A on the end |
98 | * (exclusive). So find the x indices for each. |
99 | */ |
100 | sprintf(pathbuf, "%s\001", rootdir); |
101 | xi1 = trie_before(mappedfile, rootdir); |
102 | xi2 = trie_before(mappedfile, pathbuf); |
103 | |
104 | /* |
105 | * Now do the lookups in the age index. |
106 | */ |
107 | s1 = index_query(mappedfile, xi1, t); |
108 | s2 = index_query(mappedfile, xi2, t); |
109 | |
110 | /* Display in units of 2 512-byte blocks = 1Kb */ |
111 | printf("%-11llu %s\n", (s2 - s1) / 2, rootdir); |
112 | |
113 | if (depth > 0) { |
114 | /* |
115 | * Now scan for first-level subdirectories and report |
116 | * those too. |
117 | */ |
118 | xi1++; |
119 | while (xi1 < xi2) { |
120 | trie_getpath(mappedfile, xi1, pathbuf); |
121 | run_query(mappedfile, pathbuf, t, depth-1); |
122 | strcat(pathbuf, "\001"); |
123 | xi1 = trie_before(mappedfile, pathbuf); |
124 | } |
125 | } |
126 | } |
127 | |
128 | int main(int argc, char **argv) |
129 | { |
130 | int fd, count; |
131 | struct ctx actx, *ctx = &actx; |
132 | struct stat st; |
133 | off_t totalsize, realsize; |
134 | void *mappedfile; |
135 | triewalk *tw; |
136 | indexbuild *ib; |
137 | const struct trie_file *tf; |
138 | char *filename = "agedu.dat"; |
139 | char *rootdir = NULL; |
140 | int doing_opts = 1; |
141 | enum { QUERY, HTML, SCAN, DUMP, HTTPD } mode = QUERY; |
142 | char *minage = "0d"; |
143 | |
144 | while (--argc > 0) { |
145 | char *p = *++argv; |
146 | char *optval; |
147 | |
148 | if (doing_opts && *p == '-') { |
149 | if (!strcmp(p, "--")) { |
150 | doing_opts = 0; |
151 | } else if (p[1] == '-') { |
152 | char *optval = strchr(p, '='); |
153 | if (optval) |
154 | *optval++ = '\0'; |
155 | if (!strcmp(p, "--help")) { |
156 | printf("FIXME: usage();\n"); |
157 | return 0; |
158 | } else if (!strcmp(p, "--version")) { |
159 | printf("FIXME: version();\n"); |
160 | return 0; |
161 | } else if (!strcmp(p, "--licence") || |
162 | !strcmp(p, "--license")) { |
163 | printf("FIXME: licence();\n"); |
164 | return 0; |
165 | } else if (!strcmp(p, "--scan")) { |
166 | mode = SCAN; |
167 | } else if (!strcmp(p, "--dump")) { |
168 | mode = DUMP; |
169 | } else if (!strcmp(p, "--html")) { |
170 | mode = HTML; |
171 | } else if (!strcmp(p, "--httpd") || |
172 | !strcmp(p, "--server")) { |
173 | mode = HTTPD; |
174 | } else if (!strcmp(p, "--file") || |
175 | !strcmp(p, "--minimum-age") || |
176 | !strcmp(p, "--min-age") || |
177 | !strcmp(p, "--age")) { |
178 | /* |
179 | * Long options requiring values. |
180 | */ |
181 | if (!optval) { |
182 | if (--argc > 0) { |
183 | optval = *++argv; |
184 | } else { |
185 | fprintf(stderr, "%s: option '%s' requires" |
186 | " an argument\n", PNAME, p); |
187 | return 1; |
188 | } |
189 | } |
190 | if (!strcmp(p, "--file")) { |
191 | filename = optval; |
192 | } else if (!strcmp(p, "--minimum-age") || |
193 | !strcmp(p, "--min-age") || |
194 | !strcmp(p, "--age")) { |
195 | minage = optval; |
196 | } |
197 | } else { |
198 | fprintf(stderr, "%s: unrecognised option '%s'\n", |
199 | PNAME, p); |
200 | return 1; |
201 | } |
202 | } else { |
203 | p++; |
204 | while (*p) { |
205 | char c = *p++; |
206 | |
207 | switch (c) { |
208 | /* Options requiring arguments. */ |
209 | case 'f': |
210 | case 'a': |
211 | if (*p) { |
212 | optval = p; |
213 | p += strlen(p); |
214 | } else if (--argc > 0) { |
215 | optval = *++argv; |
216 | } else { |
217 | fprintf(stderr, "%s: option '-%c' requires" |
218 | " an argument\n", PNAME, c); |
219 | return 1; |
220 | } |
221 | switch (c) { |
222 | case 'f': /* data file name */ |
223 | filename = optval; |
224 | break; |
225 | case 'a': /* maximum age */ |
226 | minage = optval; |
227 | break; |
228 | } |
229 | break; |
230 | case 's': |
231 | mode = SCAN; |
232 | break; |
233 | default: |
234 | fprintf(stderr, "%s: unrecognised option '-%c'\n", |
235 | PNAME, c); |
236 | return 1; |
237 | } |
238 | } |
239 | } |
240 | } else { |
241 | if (!rootdir) { |
242 | rootdir = p; |
243 | } else { |
244 | fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p); |
245 | return 1; |
246 | } |
247 | } |
248 | } |
249 | |
250 | if (!rootdir) |
251 | rootdir = "."; |
252 | |
253 | if (mode == SCAN) { |
254 | |
255 | fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IRWXU); |
256 | if (fd < 0) { |
257 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
258 | strerror(errno)); |
259 | return 1; |
260 | } |
261 | |
262 | if (stat(rootdir, &st) < 0) { |
263 | fprintf(stderr, "%s: %s: stat: %s\n", PNAME, rootdir, |
264 | strerror(errno)); |
265 | return 1; |
266 | } |
267 | ctx->filesystem_dev = st.st_dev; |
268 | |
269 | if (fstat(fd, &st) < 0) { |
270 | perror("agedu: fstat"); |
271 | return 1; |
272 | } |
273 | ctx->datafile_dev = st.st_dev; |
274 | ctx->datafile_ino = st.st_ino; |
275 | |
276 | ctx->last_output_update = time(NULL); |
277 | |
278 | /* |
279 | * Scan the directory tree, and write out the trie component |
280 | * of the data file. |
281 | */ |
282 | ctx->tb = triebuild_new(fd); |
283 | du(rootdir, gotdata, ctx); |
284 | count = triebuild_finish(ctx->tb); |
285 | triebuild_free(ctx->tb); |
286 | |
287 | fprintf(stderr, "%-79s\r", ""); |
288 | fflush(stderr); |
289 | |
290 | /* |
291 | * Work out how much space the cumulative index trees will |
292 | * take; enlarge the file, and memory-map it. |
293 | */ |
294 | if (fstat(fd, &st) < 0) { |
295 | perror("agedu: fstat"); |
296 | return 1; |
297 | } |
298 | |
299 | printf("Built pathname index, %d entries, %ju bytes\n", count, |
300 | (intmax_t)st.st_size); |
301 | |
302 | totalsize = index_compute_size(st.st_size, count); |
303 | |
304 | if (lseek(fd, totalsize-1, SEEK_SET) < 0) { |
305 | perror("agedu: lseek"); |
306 | return 1; |
307 | } |
308 | if (write(fd, "\0", 1) < 1) { |
309 | perror("agedu: write"); |
310 | return 1; |
311 | } |
312 | |
313 | printf("Upper bound on index file size = %ju bytes\n", |
314 | (intmax_t)totalsize); |
315 | |
316 | mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); |
317 | if (!mappedfile) { |
318 | perror("agedu: mmap"); |
319 | return 1; |
320 | } |
321 | |
322 | ib = indexbuild_new(mappedfile, st.st_size, count); |
323 | tw = triewalk_new(mappedfile); |
324 | while ((tf = triewalk_next(tw, NULL)) != NULL) |
325 | indexbuild_add(ib, tf); |
326 | triewalk_free(tw); |
327 | realsize = indexbuild_realsize(ib); |
328 | indexbuild_free(ib); |
329 | |
330 | munmap(mappedfile, totalsize); |
331 | ftruncate(fd, realsize); |
332 | close(fd); |
333 | printf("Actual index file size = %ju bytes\n", (intmax_t)realsize); |
334 | } else if (mode == QUERY) { |
335 | time_t t; |
336 | struct tm tm; |
337 | int nunits; |
338 | char unit[2]; |
339 | size_t pathlen; |
340 | |
341 | t = time(NULL); |
342 | |
343 | if (2 != sscanf(minage, "%d%1[DdWwMmYy]", &nunits, unit)) { |
344 | fprintf(stderr, "%s: minimum age should be a number followed by" |
345 | " one of d,w,m,y\n", PNAME); |
346 | return 1; |
347 | } |
348 | |
349 | if (unit[0] == 'd') { |
350 | t -= 86400 * nunits; |
351 | } else if (unit[0] == 'w') { |
352 | t -= 86400 * 7 * nunits; |
353 | } else { |
354 | int ym; |
355 | |
356 | tm = *localtime(&t); |
357 | ym = tm.tm_year * 12 + tm.tm_mon; |
358 | |
359 | if (unit[0] == 'm') |
360 | ym -= nunits; |
361 | else |
362 | ym -= 12 * nunits; |
363 | |
364 | tm.tm_year = ym / 12; |
365 | tm.tm_mon = ym % 12; |
366 | |
367 | t = mktime(&tm); |
368 | } |
369 | |
370 | fd = open(filename, O_RDONLY); |
371 | if (fd < 0) { |
372 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
373 | strerror(errno)); |
374 | return 1; |
375 | } |
376 | if (fstat(fd, &st) < 0) { |
377 | perror("agedu: fstat"); |
378 | return 1; |
379 | } |
380 | totalsize = st.st_size; |
381 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
382 | if (!mappedfile) { |
383 | perror("agedu: mmap"); |
384 | return 1; |
385 | } |
386 | |
387 | /* |
388 | * Trim trailing slash, just in case. |
389 | */ |
390 | pathlen = strlen(rootdir); |
391 | if (pathlen > 0 && rootdir[pathlen-1] == '/') |
392 | rootdir[--pathlen] = '\0'; |
393 | |
394 | run_query(mappedfile, rootdir, t, 1); |
395 | } else if (mode == HTML) { |
396 | size_t pathlen; |
397 | unsigned long xi; |
398 | char *html; |
399 | |
400 | fd = open(filename, O_RDONLY); |
401 | if (fd < 0) { |
402 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
403 | strerror(errno)); |
404 | return 1; |
405 | } |
406 | if (fstat(fd, &st) < 0) { |
407 | perror("agedu: fstat"); |
408 | return 1; |
409 | } |
410 | totalsize = st.st_size; |
411 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
412 | if (!mappedfile) { |
413 | perror("agedu: mmap"); |
414 | return 1; |
415 | } |
416 | |
417 | /* |
418 | * Trim trailing slash, just in case. |
419 | */ |
420 | pathlen = strlen(rootdir); |
421 | if (pathlen > 0 && rootdir[pathlen-1] == '/') |
422 | rootdir[--pathlen] = '\0'; |
423 | |
424 | xi = trie_before(mappedfile, rootdir); |
425 | html = html_query(mappedfile, xi, NULL); |
426 | fputs(html, stdout); |
427 | } else if (mode == DUMP) { |
428 | size_t maxpathlen; |
429 | char *buf; |
430 | |
431 | fd = open(filename, O_RDONLY); |
432 | if (fd < 0) { |
433 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
434 | strerror(errno)); |
435 | return 1; |
436 | } |
437 | if (fstat(fd, &st) < 0) { |
438 | perror("agedu: fstat"); |
439 | return 1; |
440 | } |
441 | totalsize = st.st_size; |
442 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
443 | if (!mappedfile) { |
444 | perror("agedu: mmap"); |
445 | return 1; |
446 | } |
447 | |
448 | maxpathlen = trie_maxpathlen(mappedfile); |
449 | buf = snewn(maxpathlen, char); |
450 | |
451 | tw = triewalk_new(mappedfile); |
452 | while ((tf = triewalk_next(tw, buf)) != NULL) { |
453 | printf("%s: %llu %llu\n", buf, tf->blocks, tf->atime); |
454 | } |
455 | triewalk_free(tw); |
456 | } else if (mode == HTTPD) { |
457 | fd = open(filename, O_RDONLY); |
458 | if (fd < 0) { |
459 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
460 | strerror(errno)); |
461 | return 1; |
462 | } |
463 | if (fstat(fd, &st) < 0) { |
464 | perror("agedu: fstat"); |
465 | return 1; |
466 | } |
467 | totalsize = st.st_size; |
468 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
469 | if (!mappedfile) { |
470 | perror("agedu: mmap"); |
471 | return 1; |
472 | } |
473 | |
474 | run_httpd(mappedfile); |
475 | } |
476 | |
477 | return 0; |
478 | } |