c258919f0dc746c2d3d2c64edadfaf129a3b8ec6
2 * Main program for agedu.
16 #include <sys/types.h>
20 #include <sys/ioctl.h>
32 #define lenof(x) (sizeof((x))/sizeof(*(x)))
34 void fatal(const char *fmt
, ...)
37 fprintf(stderr
, "%s: ", PNAME
);
39 vfprintf(stderr
, fmt
, ap
);
41 fprintf(stderr
, "\n");
45 struct inclusion_exclusion
{
53 dev_t datafile_dev
, filesystem_dev
;
55 time_t last_output_update
;
56 int progress
, progwidth
;
57 struct inclusion_exclusion
*inex
;
62 static int gotdata(void *vctx
, const char *pathname
, const struct stat64
*st
)
64 struct ctx
*ctx
= (struct ctx
*)vctx
;
65 struct trie_file file
;
71 * Filter out our own data file.
73 if (st
->st_dev
== ctx
->datafile_dev
&& st
->st_ino
== ctx
->datafile_ino
)
77 * Don't cross the streams^W^Wany file system boundary.
79 if (!ctx
->crossfs
&& st
->st_dev
!= ctx
->filesystem_dev
)
82 file
.blocks
= st
->st_blocks
;
83 file
.atime
= st
->st_atime
;
86 * Filter based on wildcards.
89 filename
= strrchr(pathname
, '/');
94 for (i
= 0; i
< ctx
->ninex
; i
++) {
95 if (fnmatch(ctx
->inex
[i
].wildcard
,
96 ctx
->inex
[i
].path ? pathname
: filename
, 0) == 0)
97 include
= ctx
->inex
[i
].type
;
100 return 0; /* ignore this entry and any subdirs */
103 * Here we are supposed to be filtering an entry out, but
104 * still recursing into it if it's a directory. However,
105 * we can't actually leave out any directory whose
106 * subdirectories we then look at. So we cheat, in that
107 * case, by setting the size to zero.
109 if (!S_ISDIR(st
->st_mode
))
110 return 0; /* just ignore */
115 triebuild_add(ctx
->tb
, pathname
, &file
);
118 if (t
!= ctx
->last_output_update
) {
120 fprintf(stderr
, "%-*.*s\r", ctx
->progwidth
, ctx
->progwidth
,
124 ctx
->last_output_update
= t
;
130 static void text_query(const void *mappedfile
, const char *querydir
,
135 unsigned long xi1
, xi2
;
136 unsigned long long s1
, s2
;
138 maxpathlen
= trie_maxpathlen(mappedfile
);
139 pathbuf
= snewn(maxpathlen
+ 1, char);
142 * We want to query everything between the supplied filename
143 * (inclusive) and that filename with a ^A on the end
144 * (exclusive). So find the x indices for each.
146 sprintf(pathbuf
, "%s\001", querydir
);
147 xi1
= trie_before(mappedfile
, querydir
);
148 xi2
= trie_before(mappedfile
, pathbuf
);
151 * Now do the lookups in the age index.
153 s1
= index_query(mappedfile
, xi1
, t
);
154 s2
= index_query(mappedfile
, xi2
, t
);
156 /* Display in units of 2 512-byte blocks = 1Kb */
157 printf("%-11llu %s\n", (s2
- s1
) / 2, querydir
);
161 * Now scan for first-level subdirectories and report
166 trie_getpath(mappedfile
, xi1
, pathbuf
);
167 text_query(mappedfile
, pathbuf
, t
, depth
-1);
168 strcat(pathbuf
, "\001");
169 xi1
= trie_before(mappedfile
, pathbuf
);
175 * Largely frivolous way to define all my command-line options. I
176 * present here a parametric macro which declares a series of
177 * _logical_ option identifiers, and for each one declares zero or
178 * more short option characters and zero or more long option
179 * words. Then I repeatedly invoke that macro with its arguments
180 * defined to be various other macros, which allows me to
183 * - define an enum allocating a distinct integer value to each
185 * - define a string consisting of precisely all the short option
187 * - define a string array consisting of all the long option
189 * - define (with help from auxiliary enums) integer arrays
190 * parallel to both of the above giving the logical option id
191 * for each physical short and long option
192 * - define an array indexed by logical option id indicating
193 * whether the option in question takes a value
194 * - define a function which prints out brief online help for all
197 * It's not at all clear to me that this trickery is actually
198 * particularly _efficient_ - it still, after all, requires going
199 * linearly through the option list at run time and doing a
200 * strcmp, whereas in an ideal world I'd have liked the lists of
201 * long and short options to be pre-sorted so that a binary search
202 * or some other more efficient lookup was possible. (Not that
203 * asymptotic algorithmic complexity is remotely vital in option
204 * parsing, but if I were doing this in, say, Lisp or something
205 * with an equivalently powerful preprocessor then once I'd had
206 * the idea of preparing the option-parsing data structures at
207 * compile time I would probably have made the effort to prepare
208 * them _properly_. I could have Perl generate me a source file
209 * from some sort of description, I suppose, but that would seem
210 * like overkill. And in any case, it's more of a challenge to
211 * achieve as much as possible by cunning use of cpp and enum than
212 * to just write some sensible and logical code in a Turing-
213 * complete language. I said it was largely frivolous :-)
215 * This approach does have the virtue that it brings together the
216 * option ids, option spellings and help text into a single
217 * combined list and defines them all in exactly one place. If I
218 * want to add a new option, or a new spelling for an option, I
219 * only have to modify the main OPTHELP macro below and then add
220 * code to process the new logical id.
222 * (Though, really, even that isn't ideal, since it still involves
223 * modifying the source file in more than one place. In a
224 * _properly_ ideal world, I'd be able to interleave the option
225 * definitions with the code fragments that process them. And then
226 * not bother defining logical identifiers for them at all - those
227 * would be automatically generated, since I wouldn't have any
228 * need to specify them manually in another part of the code.)
231 #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
232 HELPPFX("usage") HELPLINE("agedu [options] action") \
234 VAL(SCAN) SHORT(s) LONG(scan) \
235 HELPARG("directory") HELPOPT("scan and index a directory") \
236 NOVAL(DUMP) SHORT(d) LONG(dump) HELPOPT("dump the index file") \
237 VAL(TEXT) SHORT(t) LONG(text) \
238 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
239 VAL(HTML) SHORT(H) LONG(html) \
240 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
241 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
242 HELPOPT("serve reports from a temporary web server") \
244 VAL(DATAFILE) SHORT(f) LONG(file) \
245 HELPARG("filename") HELPOPT("[all modes] specify index file") \
246 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
247 HELPOPT("[--scan] report progress on stderr") \
248 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
249 HELPOPT("[--scan] do not report progress") \
250 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
251 LONG(progress_tty) LONG(scan_progress_tty) \
252 HELPOPT("[--scan] report progress if stderr is a tty") \
253 NOVAL(CROSSFS) LONG(cross_fs) \
254 HELPOPT("[--scan] cross filesystem boundaries") \
255 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
256 HELPOPT("[--scan] stick to one filesystem") \
257 VAL(INCLUDE) LONG(include) \
258 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
259 VAL(INCLUDEPATH) LONG(include_path) \
260 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
261 VAL(EXCLUDE) LONG(exclude) \
262 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
263 VAL(EXCLUDEPATH) LONG(exclude_path) \
264 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
265 VAL(PRUNE) LONG(prune) \
266 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
267 VAL(PRUNEPATH) LONG(prune_path) \
268 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
269 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
270 HELPARG("age") HELPOPT("[--text] include only files older than this") \
271 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
272 LONG(server_auth) LONG(web_auth) \
273 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
275 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
276 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
277 NOVAL(LICENCE) LONG(licence) LONG(license) \
278 HELPOPT("display (MIT) licence text") \
281 #define DEFENUM(x) OPT_ ## x,
284 #define STRING(x) #x ,
285 #define STRINGNOCOMMA(x) #x
286 #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
287 #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
288 #define SHORTOPTVAL(x) SHORTVAL_ ## x,
289 #define SHORTTMP(x) SHORTtmp3_ ## x,
290 #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
291 #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
292 #define LONGOPTVAL(x) LONGVAL_ ## x,
293 #define LONGTMP(x) SHORTtmp3_ ## x,
295 #define OPTIONS(NOVAL, VAL, SHORT, LONG) \
296 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
298 enum { OPTIONS(DEFENUM
,DEFENUM
,IGNORE
,IGNORE
) NOPTIONS
};
299 enum { OPTIONS(IGNORE
,IGNORE
,SHORTTMP
,IGNORE
) NSHORTOPTS
};
300 enum { OPTIONS(IGNORE
,IGNORE
,IGNORE
,LONGTMP
) NLONGOPTS
};
301 static const int opthasval
[NOPTIONS
] = {OPTIONS(ZERO
,ONE
,IGNORE
,IGNORE
)};
302 static const char shortopts
[] = {OPTIONS(IGNORE
,IGNORE
,STRINGNOCOMMA
,IGNORE
)};
303 static const char *const longopts
[] = {OPTIONS(IGNORE
,IGNORE
,IGNORE
,STRING
)};
304 enum { OPTIONS(SHORTNEWOPT
,SHORTNEWOPT
,SHORTTHISOPT
,IGNORE
) };
305 enum { OPTIONS(LONGNEWOPT
,LONGNEWOPT
,IGNORE
,LONGTHISOPT
) };
306 static const int shortvals
[] = {OPTIONS(IGNORE
,IGNORE
,SHORTOPTVAL
,IGNORE
)};
307 static const int longvals
[] = {OPTIONS(IGNORE
,IGNORE
,IGNORE
,LONGOPTVAL
)};
309 static void usage(FILE *fp
)
312 const char *prefix
, *shortopt
, *longopt
, *optarg
;
315 #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
316 #define HELPNOVAL(s) optex = 0;
317 #define HELPVAL(s) optex = 1;
318 #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
319 #define HELPLONG(s) if (!longopt) { \
320 strcpy(longbuf, "--" #s); longopt = longbuf; \
321 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
322 #define HELPPFX(s) prefix = s;
323 #define HELPARG(s) optarg = s;
324 #define HELPLINE(s) assert(optex == -1); \
325 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
327 #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
328 assert(shortopt || longopt); \
329 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
330 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
331 optarg?" ":"", optarg?optarg:""); \
332 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
335 OPTHELP(HELPNOVAL
, HELPVAL
, HELPSHORT
, HELPLONG
,
336 HELPPFX
, HELPARG
, HELPLINE
, HELPOPT
);
349 int main(int argc
, char **argv
)
352 struct ctx actx
, *ctx
= &actx
;
354 off_t totalsize
, realsize
;
358 const struct trie_file
*tf
;
359 char *filename
= "agedu.dat";
360 char *scandir
= NULL
;
361 char *querydir
= NULL
;
363 enum { USAGE
, TEXT
, HTML
, SCAN
, DUMP
, HTTPD
} mode
= USAGE
;
365 int auth
= HTTPD_AUTH_MAGIC
| HTTPD_AUTH_BASIC
;
367 struct inclusion_exclusion
*inex
= NULL
;
368 int ninex
= 0, inexsize
= 0;
371 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
373 static const char *const optnames
[NOPTIONS
] = {
374 OPTIONS(STRING
,STRING
,IGNORE
,IGNORE
)
377 for (i
= 0; i
< NSHORTOPTS
; i
++)
378 printf("-%c == %s [%s]\n", shortopts
[i
], optnames
[shortvals
[i
]],
379 opthasval
[shortvals
[i
]] ?
"value" : "no value");
380 for (i
= 0; i
< NLONGOPTS
; i
++)
381 printf("--%s == %s [%s]\n", longopts
[i
], optnames
[longvals
[i
]],
382 opthasval
[longvals
[i
]] ?
"value" : "no value");
389 if (doing_opts
&& *p
== '-') {
392 if (!strcmp(p
, "--")) {
403 if (wordstart
&& *p
== '-') {
405 * GNU-style long option.
408 optval
= strchr(p
, '=');
412 for (i
= 0; i
< NLONGOPTS
; i
++) {
413 const char *opt
= longopts
[i
], *s
= p
;
416 * The underscores in the option names
417 * defined above may be given by the user
418 * as underscores or dashes, or omitted
423 if (*s
== '-' || *s
== '_')
441 fprintf(stderr
, "%s: unrecognised option '--%s'\n",
446 if (!opthasval
[optid
]) {
448 fprintf(stderr
, "%s: unexpected argument to option"
449 " '--%s'\n", PNAME
, p
);
457 fprintf(stderr
, "%s: option '--%s' expects"
458 " an argument\n", PNAME
, p
);
464 p
+= strlen(p
); /* finished with this argument word */
471 for (i
= 0; i
< NSHORTOPTS
; i
++)
472 if (c
== shortopts
[i
]) {
473 optid
= shortvals
[i
];
478 fprintf(stderr
, "%s: unrecognised option '-%c'\n",
483 if (opthasval
[optid
]) {
487 } else if (--argc
> 0) {
490 fprintf(stderr
, "%s: option '-%c' expects"
491 " an argument\n", PNAME
, c
);
502 * Now actually process the option.
509 printf("FIXME: version();\n");
512 printf("FIXME: licence();\n");
538 case OPT_TTYPROGRESS
:
554 if (!strcmp(optval
, "magic"))
555 auth
= HTTPD_AUTH_MAGIC
;
556 else if (!strcmp(optval
, "basic"))
557 auth
= HTTPD_AUTH_BASIC
;
558 else if (!strcmp(optval
, "none"))
559 auth
= HTTPD_AUTH_NONE
;
560 else if (!strcmp(optval
, "default"))
561 auth
= HTTPD_AUTH_MAGIC
| HTTPD_AUTH_BASIC
;
563 fprintf(stderr
, "%s: unrecognised authentication"
564 " type '%s'\n%*s options are 'magic',"
565 " 'basic', 'none', 'default'\n",
566 PNAME
, optval
, (int)strlen(PNAME
), "");
571 case OPT_INCLUDEPATH
:
573 case OPT_EXCLUDEPATH
:
576 if (ninex
>= inexsize
) {
577 inexsize
= ninex
* 3 / 2 + 16;
578 inex
= sresize(inex
, inexsize
,
579 struct inclusion_exclusion
);
581 inex
[ninex
].path
= (optid
== OPT_INCLUDEPATH
||
582 optid
== OPT_EXCLUDEPATH
||
583 optid
== OPT_PRUNEPATH
);
584 inex
[ninex
].type
= (optid
== OPT_INCLUDE ?
1 :
585 optid
== OPT_INCLUDEPATH ?
1 :
586 optid
== OPT_EXCLUDE ?
0 :
587 optid
== OPT_EXCLUDEPATH ?
0 :
588 optid
== OPT_PRUNE ?
-1 :
589 /* optid == OPT_PRUNEPATH ? */ -1);
590 inex
[ninex
].wildcard
= optval
;
596 fprintf(stderr
, "%s: unexpected argument '%s'\n", PNAME
, p
);
604 } else if (mode
== SCAN
) {
606 fd
= open(filename
, O_RDWR
| O_TRUNC
| O_CREAT
, S_IRWXU
);
608 fprintf(stderr
, "%s: %s: open: %s\n", PNAME
, filename
,
613 if (stat(scandir
, &st
) < 0) {
614 fprintf(stderr
, "%s: %s: stat: %s\n", PNAME
, scandir
,
618 ctx
->filesystem_dev
= crossfs ?
0 : st
.st_dev
;
620 if (fstat(fd
, &st
) < 0) {
621 perror("agedu: fstat");
624 ctx
->datafile_dev
= st
.st_dev
;
625 ctx
->datafile_ino
= st
.st_ino
;
628 ctx
->crossfs
= crossfs
;
630 ctx
->last_output_update
= time(NULL
);
632 /* progress==1 means report progress only if stderr is a tty */
634 progress
= isatty(2) ?
2 : 0;
635 ctx
->progress
= progress
;
638 if (progress
&& ioctl(2, TIOCGWINSZ
, &ws
) == 0)
639 ctx
->progwidth
= ws
.ws_col
- 1;
645 * Scan the directory tree, and write out the trie component
648 ctx
->tb
= triebuild_new(fd
);
649 du(scandir
, gotdata
, ctx
);
650 count
= triebuild_finish(ctx
->tb
);
651 triebuild_free(ctx
->tb
);
654 fprintf(stderr
, "%-*s\r", ctx
->progwidth
, "");
659 * Work out how much space the cumulative index trees will
660 * take; enlarge the file, and memory-map it.
662 if (fstat(fd
, &st
) < 0) {
663 perror("agedu: fstat");
667 printf("Built pathname index, %d entries, %ju bytes\n", count
,
668 (intmax_t)st
.st_size
);
670 totalsize
= index_compute_size(st
.st_size
, count
);
672 if (lseek(fd
, totalsize
-1, SEEK_SET
) < 0) {
673 perror("agedu: lseek");
676 if (write(fd
, "\0", 1) < 1) {
677 perror("agedu: write");
681 printf("Upper bound on index file size = %ju bytes\n",
682 (intmax_t)totalsize
);
684 mappedfile
= mmap(NULL
, totalsize
, PROT_READ
|PROT_WRITE
,MAP_SHARED
, fd
, 0);
686 perror("agedu: mmap");
690 ib
= indexbuild_new(mappedfile
, st
.st_size
, count
);
691 tw
= triewalk_new(mappedfile
);
692 while ((tf
= triewalk_next(tw
, NULL
)) != NULL
)
693 indexbuild_add(ib
, tf
);
695 realsize
= indexbuild_realsize(ib
);
698 munmap(mappedfile
, totalsize
);
699 ftruncate(fd
, realsize
);
701 printf("Actual index file size = %ju bytes\n", (intmax_t)realsize
);
702 } else if (mode
== TEXT
) {
711 if (2 != sscanf(minage
, "%d%1[DdWwMmYy]", &nunits
, unit
)) {
712 fprintf(stderr
, "%s: minimum age should be a number followed by"
713 " one of d,w,m,y\n", PNAME
);
717 if (unit
[0] == 'd') {
719 } else if (unit
[0] == 'w') {
720 t
-= 86400 * 7 * nunits
;
725 ym
= tm
.tm_year
* 12 + tm
.tm_mon
;
732 tm
.tm_year
= ym
/ 12;
738 fd
= open(filename
, O_RDONLY
);
740 fprintf(stderr
, "%s: %s: open: %s\n", PNAME
, filename
,
744 if (fstat(fd
, &st
) < 0) {
745 perror("agedu: fstat");
748 totalsize
= st
.st_size
;
749 mappedfile
= mmap(NULL
, totalsize
, PROT_READ
, MAP_SHARED
, fd
, 0);
751 perror("agedu: mmap");
756 * Trim trailing slash, just in case.
758 pathlen
= strlen(querydir
);
759 if (pathlen
> 0 && querydir
[pathlen
-1] == '/')
760 querydir
[--pathlen
] = '\0';
762 text_query(mappedfile
, querydir
, t
, 1);
763 } else if (mode
== HTML
) {
768 fd
= open(filename
, O_RDONLY
);
770 fprintf(stderr
, "%s: %s: open: %s\n", PNAME
, filename
,
774 if (fstat(fd
, &st
) < 0) {
775 perror("agedu: fstat");
778 totalsize
= st
.st_size
;
779 mappedfile
= mmap(NULL
, totalsize
, PROT_READ
, MAP_SHARED
, fd
, 0);
781 perror("agedu: mmap");
786 * Trim trailing slash, just in case.
788 pathlen
= strlen(querydir
);
789 if (pathlen
> 0 && querydir
[pathlen
-1] == '/')
790 querydir
[--pathlen
] = '\0';
792 xi
= trie_before(mappedfile
, querydir
);
793 html
= html_query(mappedfile
, xi
, NULL
);
795 } else if (mode
== DUMP
) {
799 fd
= open(filename
, O_RDONLY
);
801 fprintf(stderr
, "%s: %s: open: %s\n", PNAME
, filename
,
805 if (fstat(fd
, &st
) < 0) {
806 perror("agedu: fstat");
809 totalsize
= st
.st_size
;
810 mappedfile
= mmap(NULL
, totalsize
, PROT_READ
, MAP_SHARED
, fd
, 0);
812 perror("agedu: mmap");
816 maxpathlen
= trie_maxpathlen(mappedfile
);
817 buf
= snewn(maxpathlen
, char);
819 tw
= triewalk_new(mappedfile
);
820 while ((tf
= triewalk_next(tw
, buf
)) != NULL
) {
821 printf("%s: %llu %llu\n", buf
, tf
->blocks
, tf
->atime
);
824 } else if (mode
== HTTPD
) {
825 fd
= open(filename
, O_RDONLY
);
827 fprintf(stderr
, "%s: %s: open: %s\n", PNAME
, filename
,
831 if (fstat(fd
, &st
) < 0) {
832 perror("agedu: fstat");
835 totalsize
= st
.st_size
;
836 mappedfile
= mmap(NULL
, totalsize
, PROT_READ
, MAP_SHARED
, fd
, 0);
838 perror("agedu: mmap");
842 run_httpd(mappedfile
, auth
);