Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / agedu.c
CommitLineData
70322ae3 1/*
2 * Main program for agedu.
3 */
4
353bc75d 5#include "agedu.h"
50e82fdc 6
70322ae3 7#include "du.h"
8#include "trie.h"
9#include "index.h"
995db599 10#include "alloc.h"
70322ae3 11#include "html.h"
12#include "httpd.h"
84849cbd 13#include "fgetline.h"
70322ae3 14
373a02e5 15/*
16 * Path separator. This global variable affects the behaviour of
17 * various parts of the code when they need to deal with path
18 * separators. The path separator appropriate to a particular data
19 * set is encoded in the index file storing that data set; data
20 * sets generated on Unix will of course have the default '/', but
21 * foreign data sets are conceivable and must be handled correctly.
22 */
23char pathsep = '/';
24
70322ae3 25void fatal(const char *fmt, ...)
26{
27 va_list ap;
28 fprintf(stderr, "%s: ", PNAME);
29 va_start(ap, fmt);
30 vfprintf(stderr, fmt, ap);
31 va_end(ap);
32 fprintf(stderr, "\n");
33 exit(1);
34}
35
9d0b9596 36struct inclusion_exclusion {
0ba55302 37 int type;
9d0b9596 38 const char *wildcard;
39 int path;
40};
41
70322ae3 42struct ctx {
43 triebuild *tb;
44 dev_t datafile_dev, filesystem_dev;
45 ino_t datafile_ino;
46 time_t last_output_update;
8b1f55d6 47 int progress, progwidth;
84849cbd 48 int straight_to_dump;
9d0b9596 49 struct inclusion_exclusion *inex;
50 int ninex;
51 int crossfs;
f59a5d34 52 int usemtime;
05b0f827 53 int fakeatimes;
70322ae3 54};
55
84849cbd 56static void dump_line(const char *pathname, const struct trie_file *tf)
57{
58 const char *p;
44d82778 59 if (printf("%llu %llu ", tf->size, tf->atime) < 0) goto error;
84849cbd 60 for (p = pathname; *p; p++) {
44d82778 61 if (*p >= ' ' && *p < 127 && *p != '%') {
62 if (putchar(*p) == EOF) goto error;
63 } else {
64 if (printf("%%%02x", (unsigned char)*p) < 0) goto error;
65 }
84849cbd 66 }
44d82778 67 if (putchar('\n') == EOF) goto error;
68 return;
69 error:
70 fatal("standard output: %s", strerror(errno));
84849cbd 71}
72
9c6e61f2 73static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st)
70322ae3 74{
75 struct ctx *ctx = (struct ctx *)vctx;
76 struct trie_file file;
77 time_t t;
9d0b9596 78 int i, include;
79 const char *filename;
70322ae3 80
81 /*
82 * Filter out our own data file.
83 */
84 if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino)
85 return 0;
86
87 /*
88 * Don't cross the streams^W^Wany file system boundary.
70322ae3 89 */
9d0b9596 90 if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev)
70322ae3 91 return 0;
92
84849cbd 93 file.size = (unsigned long long)512 * st->st_blocks;
f59a5d34 94 if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode)))
05b0f827 95 file.atime = st->st_mtime;
96 else
7e25423c 97 file.atime = max(st->st_mtime, st->st_atime);
0ba55302 98
70322ae3 99 /*
9d0b9596 100 * Filter based on wildcards.
70322ae3 101 */
9d0b9596 102 include = 1;
373a02e5 103 filename = strrchr(pathname, pathsep);
9d0b9596 104 if (!filename)
105 filename = pathname;
106 else
107 filename++;
108 for (i = 0; i < ctx->ninex; i++) {
109 if (fnmatch(ctx->inex[i].wildcard,
0ba55302 110 ctx->inex[i].path ? pathname : filename, 0) == 0)
111 include = ctx->inex[i].type;
112 }
113 if (include == -1)
114 return 0; /* ignore this entry and any subdirs */
115 if (include == 0) {
116 /*
117 * Here we are supposed to be filtering an entry out, but
118 * still recursing into it if it's a directory. However,
119 * we can't actually leave out any directory whose
120 * subdirectories we then look at. So we cheat, in that
121 * case, by setting the size to zero.
122 */
123 if (!S_ISDIR(st->st_mode))
124 return 0; /* just ignore */
125 else
84849cbd 126 file.size = 0;
9d0b9596 127 }
70322ae3 128
84849cbd 129 if (ctx->straight_to_dump)
130 dump_line(pathname, &file);
131 else
132 triebuild_add(ctx->tb, pathname, &file);
70322ae3 133
84849cbd 134 if (ctx->progress) {
135 t = time(NULL);
136 if (t != ctx->last_output_update) {
8b1f55d6 137 fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth,
138 pathname);
139 fflush(stderr);
84849cbd 140 ctx->last_output_update = t;
8b1f55d6 141 }
70322ae3 142 }
143
144 return 1;
145}
146
09fd7619 147static void scan_error(void *vctx, const char *fmt, ...)
148{
149 struct ctx *ctx = (struct ctx *)vctx;
150 va_list ap;
151
152 if (ctx->progress) {
153 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
154 fflush(stderr);
155 }
156
157 fprintf(stderr, "%s: ", PNAME);
158 va_start(ap, fmt);
159 vfprintf(stderr, fmt, ap);
160 va_end(ap);
161
162 ctx->last_output_update--; /* force a progress report next time */
163}
164
e9e7a1bf 165static void text_query(const void *mappedfile, const char *querydir,
00c5e40c 166 time_t t, int showfiles, int depth, FILE *fp)
70322ae3 167{
168 size_t maxpathlen;
169 char *pathbuf;
170 unsigned long xi1, xi2;
16139d21 171 unsigned long long size;
70322ae3 172
173 maxpathlen = trie_maxpathlen(mappedfile);
174 pathbuf = snewn(maxpathlen + 1, char);
175
176 /*
177 * We want to query everything between the supplied filename
178 * (inclusive) and that filename with a ^A on the end
179 * (exclusive). So find the x indices for each.
180 */
256c29a2 181 strcpy(pathbuf, querydir);
182 make_successor(pathbuf);
e9e7a1bf 183 xi1 = trie_before(mappedfile, querydir);
70322ae3 184 xi2 = trie_before(mappedfile, pathbuf);
185
16139d21 186 if (!showfiles && xi2 - xi1 == 1)
0313b788 187 return; /* file, or empty dir => no display */
188
70322ae3 189 /*
190 * Now do the lookups in the age index.
191 */
16139d21 192 if (xi2 - xi1 == 1) {
193 /*
194 * We are querying an individual file, so we should not
195 * depend on the index entries either side of the node,
196 * since they almost certainly don't both exist. Instead,
197 * just look up the file's size and atime in the main trie.
198 */
199 const struct trie_file *f = trie_getfile(mappedfile, xi1);
200 if (f->atime < t)
201 size = f->size;
202 else
203 size = 0;
204 } else {
205 unsigned long long s1, s2;
206 s1 = index_query(mappedfile, xi1, t);
207 s2 = index_query(mappedfile, xi2, t);
208 size = s2 - s1;
209 }
70322ae3 210
16139d21 211 if (size == 0)
010dd2a2 212 return; /* no space taken up => no display */
213
00c5e40c 214 if (depth != 0) {
70322ae3 215 /*
216 * Now scan for first-level subdirectories and report
217 * those too.
218 */
00c5e40c 219 int newdepth = (depth > 0 ? depth - 1 : depth);
70322ae3 220 xi1++;
221 while (xi1 < xi2) {
222 trie_getpath(mappedfile, xi1, pathbuf);
00c5e40c 223 text_query(mappedfile, pathbuf, t, showfiles, newdepth, fp);
256c29a2 224 make_successor(pathbuf);
70322ae3 225 xi1 = trie_before(mappedfile, pathbuf);
226 }
227 }
16e591d6 228
229 /* Display in units of 1Kb */
00c5e40c 230 fprintf(fp, "%-11llu %s\n", (size) / 1024, querydir);
70322ae3 231}
232
56fa1896 233/*
234 * Largely frivolous way to define all my command-line options. I
235 * present here a parametric macro which declares a series of
236 * _logical_ option identifiers, and for each one declares zero or
237 * more short option characters and zero or more long option
238 * words. Then I repeatedly invoke that macro with its arguments
239 * defined to be various other macros, which allows me to
240 * variously:
241 *
242 * - define an enum allocating a distinct integer value to each
243 * logical option id
244 * - define a string consisting of precisely all the short option
245 * characters
246 * - define a string array consisting of all the long option
247 * strings
248 * - define (with help from auxiliary enums) integer arrays
249 * parallel to both of the above giving the logical option id
250 * for each physical short and long option
251 * - define an array indexed by logical option id indicating
e9e7a1bf 252 * whether the option in question takes a value
253 * - define a function which prints out brief online help for all
254 * the options.
56fa1896 255 *
256 * It's not at all clear to me that this trickery is actually
257 * particularly _efficient_ - it still, after all, requires going
258 * linearly through the option list at run time and doing a
259 * strcmp, whereas in an ideal world I'd have liked the lists of
260 * long and short options to be pre-sorted so that a binary search
261 * or some other more efficient lookup was possible. (Not that
262 * asymptotic algorithmic complexity is remotely vital in option
263 * parsing, but if I were doing this in, say, Lisp or something
264 * with an equivalently powerful preprocessor then once I'd had
265 * the idea of preparing the option-parsing data structures at
266 * compile time I would probably have made the effort to prepare
267 * them _properly_. I could have Perl generate me a source file
268 * from some sort of description, I suppose, but that would seem
269 * like overkill. And in any case, it's more of a challenge to
270 * achieve as much as possible by cunning use of cpp and enum than
271 * to just write some sensible and logical code in a Turing-
272 * complete language. I said it was largely frivolous :-)
273 *
274 * This approach does have the virtue that it brings together the
e9e7a1bf 275 * option ids, option spellings and help text into a single
276 * combined list and defines them all in exactly one place. If I
277 * want to add a new option, or a new spelling for an option, I
278 * only have to modify the main OPTHELP macro below and then add
279 * code to process the new logical id.
56fa1896 280 *
281 * (Though, really, even that isn't ideal, since it still involves
282 * modifying the source file in more than one place. In a
283 * _properly_ ideal world, I'd be able to interleave the option
284 * definitions with the code fragments that process them. And then
285 * not bother defining logical identifiers for them at all - those
286 * would be automatically generated, since I wouldn't have any
287 * need to specify them manually in another part of the code.)
c5c3510f 288 *
289 * One other helpful consequence of the enum-based structure here
290 * is that it causes a compiler error if I accidentally try to
291 * define the same option (short or long) twice.
56fa1896 292 */
293
e9e7a1bf 294#define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \
bf53e756 295 HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \
e9e7a1bf 296 HELPPFX("actions") \
297 VAL(SCAN) SHORT(s) LONG(scan) \
298 HELPARG("directory") HELPOPT("scan and index a directory") \
67159944 299 NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \
300 HELPOPT("serve HTML reports from a temporary web server") \
301 VAL(TEXT) SHORT(t) LONG(text) \
302 HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \
303 NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \
304 HELPOPT("remove the index file") \
c5c3510f 305 NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \
c5c3510f 306 NOVAL(LOAD) SHORT(L) LONG(load) \
84849cbd 307 HELPOPT("load and index a dump file") \
67159944 308 VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \
309 HELPARG("directory") HELPOPT("scan only, generating a dump") \
e9e7a1bf 310 VAL(HTML) SHORT(H) LONG(html) \
311 HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
a2d04613 312 NOVAL(CGI) LONG(cgi) \
313 HELPOPT("do the right thing when run from a CGI script") \
e9e7a1bf 314 HELPPFX("options") \
315 VAL(DATAFILE) SHORT(f) LONG(file) \
c5c3510f 316 HELPARG("filename") HELPOPT("[most modes] specify index file") \
56fa1896 317 NOVAL(CROSSFS) LONG(cross_fs) \
e9e7a1bf 318 HELPOPT("[--scan] cross filesystem boundaries") \
56fa1896 319 NOVAL(NOCROSSFS) LONG(no_cross_fs) \
e9e7a1bf 320 HELPOPT("[--scan] stick to one filesystem") \
0ba55302 321 VAL(PRUNE) LONG(prune) \
322 HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \
323 VAL(PRUNEPATH) LONG(prune_path) \
324 HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \
67159944 325 VAL(EXCLUDE) LONG(exclude) \
326 HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \
327 VAL(EXCLUDEPATH) LONG(exclude_path) \
328 HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \
329 VAL(INCLUDE) LONG(include) \
330 HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \
331 VAL(INCLUDEPATH) LONG(include_path) \
332 HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \
333 NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \
334 HELPOPT("[--scan] report progress on stderr") \
335 NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \
336 HELPOPT("[--scan] do not report progress") \
337 NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \
338 LONG(progress_tty) LONG(scan_progress_tty) \
339 HELPOPT("[--scan] report progress if stderr is a tty") \
05b0f827 340 NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \
67159944 341 HELPOPT("[--scan,--load] keep real atimes on directories") \
05b0f827 342 NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
67159944 343 HELPOPT("[--scan,--load] fake atimes on directories") \
a8a4d6d8 344 NOVAL(NOEOF) LONG(no_eof) LONG(noeof) \
345 HELPOPT("[--web] do not close web server on EOF") \
f59a5d34 346 NOVAL(MTIME) LONG(mtime) \
347 HELPOPT("[--scan] use mtime instead of atime") \
16139d21 348 NOVAL(SHOWFILES) LONG(files) \
349 HELPOPT("[--web,--html,--text] list individual files") \
f2e52893 350 VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \
67159944 351 HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \
00c5e40c 352 VAL(OUTFILE) SHORT(o) LONG(output) \
353 HELPARG("filename") HELPOPT("[--html] specify output file or directory name") \
1e8d78b9 354 VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \
355 LONG(server_addr) \
356 HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \
e9e7a1bf 357 VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \
358 LONG(server_auth) LONG(web_auth) \
359 HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \
1e8d78b9 360 VAL(AUTHFILE) LONG(auth_file) \
361 HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
362 VAL(AUTHFD) LONG(auth_fd) \
363 HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
494ef23b 364 VAL(HTMLTITLE) LONG(title) \
365 HELPARG("title") HELPOPT("[--web,--html] title prefix for web pages") \
00c5e40c 366 VAL(DEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
367 HELPARG("levels") HELPOPT("[--text,--html] recurse to this many levels") \
67159944 368 VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
369 HELPARG("age") HELPOPT("[--text] include only files older than this") \
e9e7a1bf 370 HELPPFX("also") \
371 NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \
372 NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \
373 NOVAL(LICENCE) LONG(licence) LONG(license) \
374 HELPOPT("display (MIT) licence text") \
56fa1896 375
376#define IGNORE(x)
377#define DEFENUM(x) OPT_ ## x,
378#define ZERO(x) 0,
379#define ONE(x) 1,
380#define STRING(x) #x ,
381#define STRINGNOCOMMA(x) #x
382#define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x,
383#define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1,
384#define SHORTOPTVAL(x) SHORTVAL_ ## x,
385#define SHORTTMP(x) SHORTtmp3_ ## x,
386#define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x,
387#define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1,
388#define LONGOPTVAL(x) LONGVAL_ ## x,
389#define LONGTMP(x) SHORTtmp3_ ## x,
390
e9e7a1bf 391#define OPTIONS(NOVAL, VAL, SHORT, LONG) \
392 OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE)
393
56fa1896 394enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS };
395enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS };
396enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
397static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
398static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
399static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
a8d1009f 400enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) UNUSEDENUMVAL1 };
401enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) UNUSEDENUMVAL2 };
56fa1896 402static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
403static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
404
e9e7a1bf 405static void usage(FILE *fp)
406{
407 char longbuf[80];
408 const char *prefix, *shortopt, *longopt, *optarg;
409 int i, optex;
410
411#define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1
412#define HELPNOVAL(s) optex = 0;
413#define HELPVAL(s) optex = 1;
414#define HELPSHORT(s) if (!shortopt) shortopt = "-" #s;
415#define HELPLONG(s) if (!longopt) { \
416 strcpy(longbuf, "--" #s); longopt = longbuf; \
417 for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; }
418#define HELPPFX(s) prefix = s;
419#define HELPARG(s) optarg = s;
420#define HELPLINE(s) assert(optex == -1); \
421 fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \
422 HELPRESET;
423#define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \
424 assert(shortopt || longopt); \
425 i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \
426 shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \
427 optarg?" ":"", optarg?optarg:""); \
428 fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET;
429
430 HELPRESET;
431 OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG,
432 HELPPFX, HELPARG, HELPLINE, HELPOPT);
433
434#undef HELPRESET
435#undef HELPNOVAL
436#undef HELPVAL
437#undef HELPSHORT
438#undef HELPLONG
439#undef HELPPFX
440#undef HELPARG
441#undef HELPLINE
442#undef HELPOPT
443}
444
f2e52893 445static time_t parse_age(time_t now, const char *agestr)
446{
447 time_t t;
448 struct tm tm;
449 int nunits;
450 char unit[2];
451
452 t = now;
453
454 if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) {
455 fprintf(stderr, "%s: age specification should be a number followed by"
456 " one of d,w,m,y\n", PNAME);
457 exit(1);
458 }
459
460 if (unit[0] == 'd') {
461 t -= 86400 * nunits;
462 } else if (unit[0] == 'w') {
463 t -= 86400 * 7 * nunits;
464 } else {
465 int ym;
466
467 tm = *localtime(&t);
468 ym = tm.tm_year * 12 + tm.tm_mon;
469
470 if (unit[0] == 'm')
471 ym -= nunits;
472 else
473 ym -= 12 * nunits;
474
475 tm.tm_year = ym / 12;
476 tm.tm_mon = ym % 12;
477
478 t = mktime(&tm);
479 }
480
481 return t;
482}
483
70322ae3 484int main(int argc, char **argv)
485{
486 int fd, count;
487 struct ctx actx, *ctx = &actx;
488 struct stat st;
489 off_t totalsize, realsize;
490 void *mappedfile;
491 triewalk *tw;
492 indexbuild *ib;
14601b5d 493 const struct trie_file *tf, *prevtf;
bf53e756 494 char *filename = PNAME ".dat";
70322ae3 495 int doing_opts = 1;
355c3af7 496 enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE };
444c684c 497 struct action {
498 int mode;
499 char *arg;
500 } *actions = NULL;
501 int nactions = 0, actionsize = 0, action;
f2e52893 502 time_t now = time(NULL);
503 time_t textcutoff = now, htmlnewest = now, htmloldest = now;
504 int htmlautoagerange = 1;
6f25b662 505 const char *httpserveraddr = "localhost";
506 const char *httpserverport = NULL;
1e8d78b9 507 const char *httpauthdata = NULL;
00c5e40c 508 const char *outfile = NULL;
494ef23b 509 const char *html_title = PNAME;
812e4bf2 510 int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
8b1f55d6 511 int progress = 1;
9d0b9596 512 struct inclusion_exclusion *inex = NULL;
513 int ninex = 0, inexsize = 0;
514 int crossfs = 0;
00c5e40c 515 int depth = -1, gotdepth = 0;
05b0f827 516 int fakediratimes = 1;
f59a5d34 517 int mtime = 0;
a8a4d6d8 518 int closeoneof = 1;
16139d21 519 int showfiles = 0;
70322ae3 520
56fa1896 521#ifdef DEBUG_MAD_OPTION_PARSING_MACROS
522 {
523 static const char *const optnames[NOPTIONS] = {
524 OPTIONS(STRING,STRING,IGNORE,IGNORE)
525 };
526 int i;
527 for (i = 0; i < NSHORTOPTS; i++)
528 printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]],
529 opthasval[shortvals[i]] ? "value" : "no value");
530 for (i = 0; i < NLONGOPTS; i++)
531 printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]],
532 opthasval[longvals[i]] ? "value" : "no value");
533 }
534#endif
535
70322ae3 536 while (--argc > 0) {
537 char *p = *++argv;
70322ae3 538
539 if (doing_opts && *p == '-') {
56fa1896 540 int wordstart = 1;
541
70322ae3 542 if (!strcmp(p, "--")) {
543 doing_opts = 0;
56fa1896 544 continue;
545 }
546
547 p++;
548 while (*p) {
549 int optid = -1;
550 int i;
551 char *optval;
552
553 if (wordstart && *p == '-') {
70322ae3 554 /*
56fa1896 555 * GNU-style long option.
70322ae3 556 */
56fa1896 557 p++;
558 optval = strchr(p, '=');
559 if (optval)
560 *optval++ = '\0';
561
562 for (i = 0; i < NLONGOPTS; i++) {
563 const char *opt = longopts[i], *s = p;
564 int match = 1;
565 /*
566 * The underscores in the option names
567 * defined above may be given by the user
568 * as underscores or dashes, or omitted
569 * entirely.
570 */
571 while (*opt) {
572 if (*opt == '_') {
573 if (*s == '-' || *s == '_')
574 s++;
575 } else {
576 if (*opt != *s) {
577 match = 0;
578 break;
579 }
580 s++;
581 }
582 opt++;
583 }
584 if (match && !*s) {
585 optid = longvals[i];
586 break;
70322ae3 587 }
588 }
56fa1896 589
590 if (optid < 0) {
591 fprintf(stderr, "%s: unrecognised option '--%s'\n",
592 PNAME, p);
593 return 1;
594 }
595
596 if (!opthasval[optid]) {
597 if (optval) {
598 fprintf(stderr, "%s: unexpected argument to option"
599 " '--%s'\n", PNAME, p);
812e4bf2 600 return 1;
601 }
56fa1896 602 } else {
603 if (!optval) {
604 if (--argc > 0) {
605 optval = *++argv;
606 } else {
607 fprintf(stderr, "%s: option '--%s' expects"
608 " an argument\n", PNAME, p);
609 return 1;
610 }
9d0b9596 611 }
70322ae3 612 }
56fa1896 613
614 p += strlen(p); /* finished with this argument word */
70322ae3 615 } else {
56fa1896 616 /*
617 * Short option.
618 */
70322ae3 619 char c = *p++;
620
56fa1896 621 for (i = 0; i < NSHORTOPTS; i++)
622 if (c == shortopts[i]) {
623 optid = shortvals[i];
624 break;
625 }
626
627 if (optid < 0) {
628 fprintf(stderr, "%s: unrecognised option '-%c'\n",
629 PNAME, c);
630 return 1;
631 }
632
633 if (opthasval[optid]) {
70322ae3 634 if (*p) {
635 optval = p;
636 p += strlen(p);
637 } else if (--argc > 0) {
638 optval = *++argv;
639 } else {
56fa1896 640 fprintf(stderr, "%s: option '-%c' expects"
70322ae3 641 " an argument\n", PNAME, c);
642 return 1;
643 }
56fa1896 644 } else {
645 optval = NULL;
646 }
647 }
648
649 wordstart = 0;
650
651 /*
652 * Now actually process the option.
653 */
654 switch (optid) {
655 case OPT_HELP:
e9e7a1bf 656 usage(stdout);
56fa1896 657 return 0;
658 case OPT_VERSION:
e6fde1f7 659#ifdef PACKAGE_VERSION
660 printf("%s, revision %s\n", PNAME, PACKAGE_VERSION);
661#else
662 printf("%s: version number not available when not built"
663 " via automake\n", PNAME);
664#endif
56fa1896 665 return 0;
666 case OPT_LICENCE:
5a29503d 667 {
668 extern const char *const licence[];
669 int i;
670
671 for (i = 0; licence[i]; i++)
672 fputs(licence[i], stdout);
5a29503d 673 }
56fa1896 674 return 0;
675 case OPT_SCAN:
444c684c 676 if (nactions >= actionsize) {
677 actionsize = nactions * 3 / 2 + 16;
678 actions = sresize(actions, actionsize, struct action);
679 }
680 actions[nactions].mode = SCAN;
681 actions[nactions].arg = optval;
682 nactions++;
56fa1896 683 break;
84849cbd 684 case OPT_SCANDUMP:
444c684c 685 if (nactions >= actionsize) {
686 actionsize = nactions * 3 / 2 + 16;
687 actions = sresize(actions, actionsize, struct action);
688 }
689 actions[nactions].mode = SCANDUMP;
690 actions[nactions].arg = optval;
691 nactions++;
84849cbd 692 break;
56fa1896 693 case OPT_DUMP:
444c684c 694 if (nactions >= actionsize) {
695 actionsize = nactions * 3 / 2 + 16;
696 actions = sresize(actions, actionsize, struct action);
697 }
698 actions[nactions].mode = DUMP;
699 actions[nactions].arg = NULL;
700 nactions++;
56fa1896 701 break;
84849cbd 702 case OPT_LOAD:
444c684c 703 if (nactions >= actionsize) {
704 actionsize = nactions * 3 / 2 + 16;
705 actions = sresize(actions, actionsize, struct action);
706 }
707 actions[nactions].mode = LOAD;
708 actions[nactions].arg = NULL;
709 nactions++;
84849cbd 710 break;
56fa1896 711 case OPT_TEXT:
444c684c 712 if (nactions >= actionsize) {
713 actionsize = nactions * 3 / 2 + 16;
714 actions = sresize(actions, actionsize, struct action);
715 }
716 actions[nactions].mode = TEXT;
717 actions[nactions].arg = optval;
718 nactions++;
56fa1896 719 break;
720 case OPT_HTML:
a2d04613 721 case OPT_CGI:
444c684c 722 if (nactions >= actionsize) {
723 actionsize = nactions * 3 / 2 + 16;
724 actions = sresize(actions, actionsize, struct action);
725 }
726 actions[nactions].mode = HTML;
a2d04613 727 actions[nactions].arg = (optid == OPT_HTML ? optval :
728 NULL);
444c684c 729 nactions++;
56fa1896 730 break;
731 case OPT_HTTPD:
444c684c 732 if (nactions >= actionsize) {
733 actionsize = nactions * 3 / 2 + 16;
734 actions = sresize(actions, actionsize, struct action);
735 }
736 actions[nactions].mode = HTTPD;
737 actions[nactions].arg = NULL;
738 nactions++;
56fa1896 739 break;
355c3af7 740 case OPT_REMOVE:
741 if (nactions >= actionsize) {
742 actionsize = nactions * 3 / 2 + 16;
743 actions = sresize(actions, actionsize, struct action);
744 }
745 actions[nactions].mode = REMOVE;
746 actions[nactions].arg = NULL;
747 nactions++;
748 break;
56fa1896 749 case OPT_PROGRESS:
750 progress = 2;
751 break;
752 case OPT_NOPROGRESS:
753 progress = 0;
754 break;
755 case OPT_TTYPROGRESS:
756 progress = 1;
757 break;
758 case OPT_CROSSFS:
759 crossfs = 1;
760 break;
761 case OPT_NOCROSSFS:
762 crossfs = 0;
763 break;
05b0f827 764 case OPT_DIRATIME:
765 fakediratimes = 0;
766 break;
767 case OPT_NODIRATIME:
768 fakediratimes = 1;
769 break;
16139d21 770 case OPT_SHOWFILES:
771 showfiles = 1;
772 break;
f59a5d34 773 case OPT_MTIME:
774 mtime = 1;
775 break;
a8a4d6d8 776 case OPT_NOEOF:
777 closeoneof = 0;
778 break;
56fa1896 779 case OPT_DATAFILE:
780 filename = optval;
781 break;
00c5e40c 782 case OPT_DEPTH:
783 if (!strcasecmp(optval, "unlimited") ||
784 !strcasecmp(optval, "infinity") ||
785 !strcasecmp(optval, "infinite") ||
786 !strcasecmp(optval, "inf") ||
787 !strcasecmp(optval, "maximum") ||
788 !strcasecmp(optval, "max"))
789 depth = -1;
790 else
791 depth = atoi(optval);
792 gotdepth = 1;
793 break;
794 case OPT_OUTFILE:
795 outfile = optval;
16e591d6 796 break;
494ef23b 797 case OPT_HTMLTITLE:
798 html_title = optval;
799 break;
56fa1896 800 case OPT_MINAGE:
f2e52893 801 textcutoff = parse_age(now, optval);
802 break;
803 case OPT_AGERANGE:
804 if (!strcmp(optval, "auto")) {
805 htmlautoagerange = 1;
806 } else {
807 char *q = optval + strcspn(optval, "-:");
808 if (*q)
809 *q++ = '\0';
810 htmloldest = parse_age(now, optval);
811 htmlnewest = *q ? parse_age(now, q) : now;
812 htmlautoagerange = 0;
813 }
56fa1896 814 break;
1e8d78b9 815 case OPT_SERVERADDR:
816 {
817 char *port;
818 if (optval[0] == '[' &&
819 (port = strchr(optval, ']')) != NULL)
820 port++;
821 else
822 port = optval;
823 port += strcspn(port, ":");
6f25b662 824 if (port && *port)
1e8d78b9 825 *port++ = '\0';
6f25b662 826 if (!strcmp(optval, "ANY"))
827 httpserveraddr = NULL;
828 else
829 httpserveraddr = optval;
830 httpserverport = port;
1e8d78b9 831 }
832 break;
56fa1896 833 case OPT_AUTH:
834 if (!strcmp(optval, "magic"))
835 auth = HTTPD_AUTH_MAGIC;
836 else if (!strcmp(optval, "basic"))
837 auth = HTTPD_AUTH_BASIC;
838 else if (!strcmp(optval, "none"))
839 auth = HTTPD_AUTH_NONE;
840 else if (!strcmp(optval, "default"))
841 auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
f2e52893 842 else if (!strcmp(optval, "help") ||
843 !strcmp(optval, "list")) {
bf53e756 844 printf(PNAME ": supported HTTP authentication types"
f2e52893 845 " are:\n"
846 " magic use Linux /proc/net/tcp to"
847 " determine owner of peer socket\n"
848 " basic HTTP Basic username and"
849 " password authentication\n"
850 " default use 'magic' if possible, "
851 " otherwise fall back to 'basic'\n"
852 " none unauthenticated HTTP (if"
853 " the data file is non-confidential)\n");
854 return 0;
855 } else {
56fa1896 856 fprintf(stderr, "%s: unrecognised authentication"
857 " type '%s'\n%*s options are 'magic',"
858 " 'basic', 'none', 'default'\n",
859 PNAME, optval, (int)strlen(PNAME), "");
860 return 1;
861 }
862 break;
1e8d78b9 863 case OPT_AUTHFILE:
864 case OPT_AUTHFD:
865 {
866 int fd;
867 char namebuf[40];
868 const char *name;
869 char *authbuf;
870 int authlen, authsize;
871 int ret;
872
873 if (optid == OPT_AUTHFILE) {
874 fd = open(optval, O_RDONLY);
875 if (fd < 0) {
876 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
877 optval, strerror(errno));
878 return 1;
879 }
880 name = optval;
881 } else {
882 fd = atoi(optval);
883 name = namebuf;
884 sprintf(namebuf, "fd %d", fd);
885 }
886
887 authlen = 0;
888 authsize = 256;
889 authbuf = snewn(authsize, char);
890 while ((ret = read(fd, authbuf+authlen,
891 authsize-authlen)) > 0) {
892 authlen += ret;
893 if ((authsize - authlen) < (authsize / 16)) {
894 authsize = authlen * 3 / 2 + 4096;
895 authbuf = sresize(authbuf, authsize, char);
896 }
897 }
898 if (ret < 0) {
899 fprintf(stderr, "%s: %s: read: %s\n", PNAME,
900 name, strerror(errno));
901 return 1;
902 }
903 if (optid == OPT_AUTHFILE)
904 close(fd);
905 httpauthdata = authbuf;
906 }
907 break;
56fa1896 908 case OPT_INCLUDE:
909 case OPT_INCLUDEPATH:
910 case OPT_EXCLUDE:
911 case OPT_EXCLUDEPATH:
0ba55302 912 case OPT_PRUNE:
913 case OPT_PRUNEPATH:
56fa1896 914 if (ninex >= inexsize) {
915 inexsize = ninex * 3 / 2 + 16;
916 inex = sresize(inex, inexsize,
917 struct inclusion_exclusion);
918 }
919 inex[ninex].path = (optid == OPT_INCLUDEPATH ||
0ba55302 920 optid == OPT_EXCLUDEPATH ||
921 optid == OPT_PRUNEPATH);
922 inex[ninex].type = (optid == OPT_INCLUDE ? 1 :
923 optid == OPT_INCLUDEPATH ? 1 :
924 optid == OPT_EXCLUDE ? 0 :
925 optid == OPT_EXCLUDEPATH ? 0 :
926 optid == OPT_PRUNE ? -1 :
927 /* optid == OPT_PRUNEPATH ? */ -1);
56fa1896 928 inex[ninex].wildcard = optval;
929 ninex++;
930 break;
931 }
932 }
70322ae3 933 } else {
e9e7a1bf 934 fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p);
935 return 1;
70322ae3 936 }
937 }
938
444c684c 939 if (nactions == 0) {
e9e7a1bf 940 usage(stderr);
941 return 1;
444c684c 942 }
943
944 for (action = 0; action < nactions; action++) {
945 int mode = actions[action].mode;
946
947 if (mode == SCAN || mode == SCANDUMP || mode == LOAD) {
948 const char *scandir = actions[action].arg;
14601b5d 949
444c684c 950 if (mode == LOAD) {
951 char *buf = fgetline(stdin);
952 unsigned newpathsep;
953 buf[strcspn(buf, "\r\n")] = '\0';
bf53e756 954 if (1 != sscanf(buf, DUMPHDR "%x",
444c684c 955 &newpathsep)) {
956 fprintf(stderr, "%s: header in dump file not recognised\n",
957 PNAME);
958 return 1;
959 }
960 pathsep = (char)newpathsep;
961 sfree(buf);
84849cbd 962 }
70322ae3 963
444c684c 964 if (mode == SCAN || mode == LOAD) {
965 /*
966 * Prepare to write out the index file.
967 */
cc7db507 968 fd = open(filename, O_RDWR | O_TRUNC | O_CREAT,
969 S_IRUSR | S_IWUSR);
444c684c 970 if (fd < 0) {
971 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
972 strerror(errno));
973 return 1;
974 }
975 if (fstat(fd, &st) < 0) {
bf53e756 976 perror(PNAME ": fstat");
444c684c 977 return 1;
978 }
979 ctx->datafile_dev = st.st_dev;
980 ctx->datafile_ino = st.st_ino;
981 ctx->straight_to_dump = 0;
982 } else {
983 ctx->datafile_dev = -1;
984 ctx->datafile_ino = -1;
985 ctx->straight_to_dump = 1;
84849cbd 986 }
444c684c 987
988 if (mode == SCAN || mode == SCANDUMP) {
989 if (stat(scandir, &st) < 0) {
990 fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir,
991 strerror(errno));
992 return 1;
993 }
994 ctx->filesystem_dev = crossfs ? 0 : st.st_dev;
84849cbd 995 }
70322ae3 996
444c684c 997 ctx->inex = inex;
998 ctx->ninex = ninex;
999 ctx->crossfs = crossfs;
05b0f827 1000 ctx->fakeatimes = fakediratimes;
f59a5d34 1001 ctx->usemtime = mtime;
444c684c 1002
1003 ctx->last_output_update = time(NULL);
1004
1005 /* progress==1 means report progress only if stderr is a tty */
1006 if (progress == 1)
1007 progress = isatty(2) ? 2 : 0;
1008 ctx->progress = progress;
1009 {
1010 struct winsize ws;
9cb5a01c 1011 if (progress &&
1012 ioctl(2, TIOCGWINSZ, &ws) == 0 &&
1013 ws.ws_col > 0)
444c684c 1014 ctx->progwidth = ws.ws_col - 1;
1015 else
1016 ctx->progwidth = 79;
84849cbd 1017 }
84849cbd 1018
444c684c 1019 if (mode == SCANDUMP)
bf53e756 1020 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
8b1f55d6 1021
444c684c 1022 /*
1023 * Scan the directory tree, and write out the trie component
1024 * of the data file.
1025 */
1026 if (mode != SCANDUMP) {
1027 ctx->tb = triebuild_new(fd);
1028 }
1029 if (mode == LOAD) {
1030 char *buf;
1031 int line = 2;
1032 while ((buf = fgetline(stdin)) != NULL) {
1033 struct trie_file tf;
1034 char *p, *q;
1035
1036 buf[strcspn(buf, "\r\n")] = '\0';
1037
1038 p = buf;
1039 q = p;
1040 while (*p && *p != ' ') p++;
1041 if (!*p) {
1042 fprintf(stderr, "%s: dump file line %d: expected at least"
1043 " three fields\n", PNAME, line);
1044 return 1;
1045 }
1046 *p++ = '\0';
1047 tf.size = strtoull(q, NULL, 10);
1048 q = p;
1049 while (*p && *p != ' ') p++;
1050 if (!*p) {
1051 fprintf(stderr, "%s: dump file line %d: expected at least"
1052 " three fields\n", PNAME, line);
1053 return 1;
1054 }
1055 *p++ = '\0';
1056 tf.atime = strtoull(q, NULL, 10);
1057 q = buf;
1058 while (*p) {
1059 int c = *p;
1060 if (*p == '%') {
1061 int i;
1062 p++;
1063 c = 0;
1064 for (i = 0; i < 2; i++) {
de693987 1065 c *= 16;
444c684c 1066 if (*p >= '0' && *p <= '9')
1067 c += *p - '0';
1068 else if (*p >= 'A' && *p <= 'F')
1069 c += *p - ('A' - 10);
1070 else if (*p >= 'a' && *p <= 'f')
1071 c += *p - ('a' - 10);
1072 else {
1073 fprintf(stderr, "%s: dump file line %d: unable"
1074 " to parse hex escape\n", PNAME, line);
1075 }
1076 p++;
1077 }
1f651677 1078 } else {
1079 p++;
444c684c 1080 }
1081 *q++ = c;
444c684c 1082 }
1083 *q = '\0';
1084 triebuild_add(ctx->tb, buf, &tf);
1085 sfree(buf);
de693987 1086 line++;
444c684c 1087 }
1088 } else {
09fd7619 1089 du(scandir, gotdata, scan_error, ctx);
444c684c 1090 }
1091 if (mode != SCANDUMP) {
14601b5d 1092 size_t maxpathlen;
522edd92 1093 size_t delta;
14601b5d 1094 char *buf, *prevbuf;
1095
444c684c 1096 count = triebuild_finish(ctx->tb);
1097 triebuild_free(ctx->tb);
84849cbd 1098
444c684c 1099 if (ctx->progress) {
1100 fprintf(stderr, "%-*s\r", ctx->progwidth, "");
1101 fflush(stderr);
1102 }
84849cbd 1103
444c684c 1104 /*
1105 * Work out how much space the cumulative index trees
1106 * will take; enlarge the file, and memory-map it.
1107 */
1108 if (fstat(fd, &st) < 0) {
bf53e756 1109 perror(PNAME ": fstat");
444c684c 1110 return 1;
1111 }
84849cbd 1112
522edd92 1113 printf("Built pathname index, %d entries,"
1114 " %llu bytes of index\n", count,
50e82fdc 1115 (unsigned long long)st.st_size);
444c684c 1116
522edd92 1117 totalsize = index_initial_size(st.st_size, count);
1118 totalsize += totalsize / 10;
444c684c 1119
1120 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
bf53e756 1121 perror(PNAME ": lseek");
84849cbd 1122 return 1;
1123 }
444c684c 1124 if (write(fd, "\0", 1) < 1) {
bf53e756 1125 perror(PNAME ": write");
84849cbd 1126 return 1;
1127 }
444c684c 1128
444c684c 1129 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1130 if (!mappedfile) {
bf53e756 1131 perror(PNAME ": mmap");
444c684c 1132 return 1;
84849cbd 1133 }
444c684c 1134
05b0f827 1135 if (fakediratimes) {
1136 printf("Faking directory atimes\n");
1137 trie_fake_dir_atimes(mappedfile);
1138 }
1139
1140 printf("Building index\n");
522edd92 1141 ib = indexbuild_new(mappedfile, st.st_size, count, &delta);
14601b5d 1142 maxpathlen = trie_maxpathlen(mappedfile);
1143 buf = snewn(maxpathlen, char);
1144 prevbuf = snewn(maxpathlen, char);
444c684c 1145 tw = triewalk_new(mappedfile);
14601b5d 1146 prevbuf[0] = '\0';
1147 tf = triewalk_next(tw, buf);
1148 assert(tf);
1149 while (1) {
1150 int i;
1151
522edd92 1152 if (totalsize - indexbuild_realsize(ib) < delta) {
645dbd49 1153 const void *oldfile = mappedfile;
1154 ptrdiff_t diff;
1155
522edd92 1156 /*
1157 * Unmap the file, grow it, and remap it.
1158 */
1159 munmap(mappedfile, totalsize);
1160
1161 totalsize += delta;
1162 totalsize += totalsize / 10;
1163
1164 if (lseek(fd, totalsize-1, SEEK_SET) < 0) {
1165 perror(PNAME ": lseek");
1166 return 1;
1167 }
1168 if (write(fd, "\0", 1) < 1) {
1169 perror(PNAME ": write");
1170 return 1;
1171 }
1172
1173 mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0);
1174 if (!mappedfile) {
1175 perror(PNAME ": mmap");
1176 return 1;
1177 }
1178
1179 indexbuild_rebase(ib, mappedfile);
1180 triewalk_rebase(tw, mappedfile);
645dbd49 1181 diff = (const unsigned char *)mappedfile -
1182 (const unsigned char *)oldfile;
645dbd49 1183 if (tf)
1184 tf = (const struct trie_file *)
1185 (((const unsigned char *)tf) + diff);
522edd92 1186 }
1187
14601b5d 1188 /*
1189 * Get the next file from the index. So we are
1190 * currently holding, and have not yet
1191 * indexed, prevtf (with pathname prevbuf) and
1192 * tf (with pathname buf).
1193 */
1194 prevtf = tf;
1195 memcpy(prevbuf, buf, maxpathlen);
1196 tf = triewalk_next(tw, buf);
1197
1198 if (!tf)
1199 buf[0] = '\0';
1200
1201 /*
1202 * Find the first differing character position
1203 * between our two pathnames.
1204 */
1205 for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++);
1206
1207 /*
1208 * If prevbuf was a directory name and buf is
1209 * something inside that directory, then
1210 * trie_before() will be called on prevbuf
1211 * itself. Hence we must drop a tag before it,
1212 * so that the resulting index is usable.
1213 */
1214 if ((!prevbuf[i] && (buf[i] == pathsep ||
1215 (i > 0 && buf[i-1] == pathsep))))
1216 indexbuild_tag(ib);
1217
1218 /*
1219 * Add prevtf to the index.
1220 */
1221 indexbuild_add(ib, prevtf);
1222
1223 if (!tf) {
1224 /*
1225 * Drop an unconditional final tag, and
1226 * get out of this loop.
1227 */
1228 indexbuild_tag(ib);
1229 break;
1230 }
14601b5d 1231
1232 /*
1233 * If prevbuf was a filename inside some
1234 * directory which buf is outside, then
1235 * trie_before() will be called on some
1236 * pathname either equal to buf or epsilon
1237 * less than it. Either way, we're going to
1238 * need to drop a tag after prevtf.
1239 */
1240 if (strchr(prevbuf+i, pathsep) || !tf)
1241 indexbuild_tag(ib);
1242 }
1243
444c684c 1244 triewalk_free(tw);
1245 realsize = indexbuild_realsize(ib);
1246 indexbuild_free(ib);
1247
1248 munmap(mappedfile, totalsize);
4cc1cf91 1249 if (ftruncate(fd, realsize) < 0)
1250 fatal("%s: truncate: %s\n", filename, strerror(errno));
444c684c 1251 close(fd);
522edd92 1252 printf("Final index file size = %llu bytes\n",
50e82fdc 1253 (unsigned long long)realsize);
84849cbd 1254 }
444c684c 1255 } else if (mode == TEXT) {
1256 char *querydir = actions[action].arg;
1257 size_t pathlen;
70322ae3 1258
444c684c 1259 fd = open(filename, O_RDONLY);
1260 if (fd < 0) {
1261 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1262 strerror(errno));
1263 return 1;
1264 }
1265 if (fstat(fd, &st) < 0) {
bf53e756 1266 perror(PNAME ": fstat");
444c684c 1267 return 1;
1268 }
1269 totalsize = st.st_size;
1270 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1271 if (!mappedfile) {
bf53e756 1272 perror(PNAME ": mmap");
444c684c 1273 return 1;
84849cbd 1274 }
bb013b1f 1275 if (!trie_check_magic(mappedfile)) {
1276 fprintf(stderr, "%s: %s: magic numbers did not match\n"
1277 "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
1278 return 1;
1279 }
444c684c 1280 pathsep = trie_pathsep(mappedfile);
70322ae3 1281
84849cbd 1282 /*
444c684c 1283 * Trim trailing slash, just in case.
84849cbd 1284 */
444c684c 1285 pathlen = strlen(querydir);
1286 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1287 querydir[--pathlen] = '\0';
1288
00c5e40c 1289 if (!gotdepth)
1290 depth = 1; /* default for text mode */
1291 if (outfile != NULL) {
1292 FILE *fp = fopen(outfile, "w");
1293 if (!fp) {
1294 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
1295 outfile, strerror(errno));
1296 return 1;
1297 }
1298 text_query(mappedfile, querydir, textcutoff, showfiles,
1299 depth, fp);
1300 fclose(fp);
1301 } else {
1302 text_query(mappedfile, querydir, textcutoff, showfiles,
1303 depth, stdout);
1304 }
56cae6e1 1305
1306 munmap(mappedfile, totalsize);
444c684c 1307 } else if (mode == HTML) {
1308 char *querydir = actions[action].arg;
92d3b326 1309 size_t pathlen, maxpathlen;
1310 char *pathbuf;
444c684c 1311 struct html_config cfg;
1312 unsigned long xi;
1313 char *html;
1314
1315 fd = open(filename, O_RDONLY);
1316 if (fd < 0) {
1317 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1318 strerror(errno));
a2d04613 1319 if (!querydir) {
1320 printf("Status: 500\nContent-type: text/html\n\n"
1321 "<html><head>"
1322 "<title>500 Internal Server Error</title>"
1323 "</head><body>"
1324 "<h1>500 Internal Server Error</h1>"
1325 "<p><code>agedu</code> suffered an internal error."
1326 "</body></html>\n");
1327 return 0;
1328 }
444c684c 1329 return 1;
1330 }
84849cbd 1331 if (fstat(fd, &st) < 0) {
a2d04613 1332 fprintf(stderr, "%s: %s: fstat: %s\n", PNAME, filename,
1333 strerror(errno));
1334 if (!querydir) {
1335 printf("Status: 500\nContent-type: text/html\n\n"
1336 "<html><head>"
1337 "<title>500 Internal Server Error</title>"
1338 "</head><body>"
1339 "<h1>500 Internal Server Error</h1>"
1340 "<p><code>agedu</code> suffered an internal error."
1341 "</body></html>\n");
1342 return 0;
1343 }
84849cbd 1344 return 1;
1345 }
444c684c 1346 totalsize = st.st_size;
1347 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1348 if (!mappedfile) {
a2d04613 1349 fprintf(stderr, "%s: %s: mmap: %s\n", PNAME, filename,
1350 strerror(errno));
1351 if (!querydir) {
1352 printf("Status: 500\nContent-type: text/html\n\n"
1353 "<html><head>"
1354 "<title>500 Internal Server Error</title>"
1355 "</head><body>"
1356 "<h1>500 Internal Server Error</h1>"
1357 "<p><code>agedu</code> suffered an internal error."
1358 "</body></html>\n");
1359 return 0;
1360 }
444c684c 1361 return 1;
1362 }
bb013b1f 1363 if (!trie_check_magic(mappedfile)) {
1364 fprintf(stderr, "%s: %s: magic numbers did not match\n"
1365 "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
1366 if (!querydir) {
1367 printf("Status: 500\nContent-type: text/html\n\n"
1368 "<html><head>"
1369 "<title>500 Internal Server Error</title>"
1370 "</head><body>"
1371 "<h1>500 Internal Server Error</h1>"
1372 "<p><code>agedu</code> suffered an internal error."
1373 "</body></html>\n");
1374 return 0;
1375 }
1376 return 1;
1377 }
444c684c 1378 pathsep = trie_pathsep(mappedfile);
70322ae3 1379
92d3b326 1380 maxpathlen = trie_maxpathlen(mappedfile);
1381 pathbuf = snewn(maxpathlen, char);
1382
c47f39de 1383 if (!querydir || !gotdepth) {
1384 /*
1385 * Single output file.
1386 */
1387 if (!querydir) {
1388 cfg.uriformat = "/%|/%p/%|%|/%p";
1389 } else {
1390 cfg.uriformat = NULL;
1391 }
1392 cfg.autoage = htmlautoagerange;
1393 cfg.oldest = htmloldest;
1394 cfg.newest = htmlnewest;
1395 cfg.showfiles = showfiles;
1396 } else {
1397 cfg.uriformat = "/index.html%|/%/p.html";
1398 cfg.fileformat = "/index.html%|/%/p.html";
1399 cfg.autoage = htmlautoagerange;
1400 cfg.oldest = htmloldest;
1401 cfg.newest = htmlnewest;
1402 cfg.showfiles = showfiles;
1403 }
494ef23b 1404 cfg.html_title = html_title;
c47f39de 1405
a2d04613 1406 if (!querydir) {
1407 /*
1408 * If we're run in --cgi mode, read PATH_INFO to get
1409 * a numeric pathname index.
1410 */
1411 char *path_info = getenv("PATH_INFO");
1412
1413 if (!path_info)
1414 path_info = "";
444c684c 1415
c47f39de 1416 /*
1417 * Parse the path.
1418 */
1419 if (!html_parse_path(mappedfile, path_info, &cfg, &xi)) {
1420 printf("Status: 404\nContent-type: text/html\n\n"
1421 "<html><head>"
1422 "<title>404 Not Found</title>"
1423 "</head><body>"
1424 "<h1>400 Not Found</h1>"
1425 "<p>Invalid <code>agedu</code> pathname."
1426 "</body></html>\n");
1427 return 0;
1428 }
1429
a2d04613 1430 /*
c47f39de 1431 * If the path was parseable but not canonically
1432 * expressed, return a redirect to the canonical
1433 * version.
a2d04613 1434 */
c47f39de 1435 char *canonpath = html_format_path(mappedfile, &cfg, xi);
1436 if (strcmp(canonpath, path_info)) {
a2d04613 1437 char *servername = getenv("SERVER_NAME");
1438 char *scriptname = getenv("SCRIPT_NAME");
1439 if (!servername || !scriptname) {
1440 if (servername)
1441 fprintf(stderr, "%s: SCRIPT_NAME unset\n", PNAME);
1442 else if (scriptname)
1443 fprintf(stderr, "%s: SCRIPT_NAME unset\n", PNAME);
1444 else
1445 fprintf(stderr, "%s: SERVER_NAME and "
1446 "SCRIPT_NAME both unset\n", PNAME);
1447 printf("Status: 500\nContent-type: text/html\n\n"
1448 "<html><head>"
1449 "<title>500 Internal Server Error</title>"
1450 "</head><body>"
1451 "<h1>500 Internal Server Error</h1>"
1452 "<p><code>agedu</code> suffered an internal "
1453 "error."
1454 "</body></html>\n");
1455 return 0;
1456 }
1457 printf("Status: 301\n"
c47f39de 1458 "Location: http://%s/%s%s\n"
a2d04613 1459 "Content-type: text/html\n\n"
1460 "<html><head>"
1461 "<title>301 Moved</title>"
1462 "</head><body>"
1463 "<h1>301 Moved</h1>"
1464 "<p>Moved."
1465 "</body></html>\n",
c47f39de 1466 servername, scriptname, canonpath);
a2d04613 1467 return 0;
1468 }
a2d04613 1469
a2d04613 1470 } else {
1471 /*
1472 * In ordinary --html mode, process a query
1473 * directory passed in on the command line.
1474 */
1475
1476 /*
1477 * Trim trailing slash, just in case.
1478 */
1479 pathlen = strlen(querydir);
1480 if (pathlen > 0 && querydir[pathlen-1] == pathsep)
1481 querydir[--pathlen] = '\0';
1482
1483 xi = trie_before(mappedfile, querydir);
1484 if (xi >= trie_count(mappedfile) ||
1485 (trie_getpath(mappedfile, xi, pathbuf),
1486 strcmp(pathbuf, querydir))) {
1487 fprintf(stderr, "%s: pathname '%s' does not exist in index\n"
1488 "%*s(check it is spelled exactly as it is in the "
1489 "index, including\n%*sany leading './')\n",
1490 PNAME, querydir,
1491 (int)(1+sizeof(PNAME)), "",
1492 (int)(1+sizeof(PNAME)), "");
1493 return 1;
1494 } else if (!index_has_root(mappedfile, xi)) {
1495 fprintf(stderr, "%s: pathname '%s' is"
1496 " a file, not a directory\n", PNAME, querydir);
1497 return 1;
1498 }
1499 }
1500
1501 if (!querydir || !gotdepth) {
00c5e40c 1502 /*
1503 * Single output file.
1504 */
a2d04613 1505 html = html_query(mappedfile, xi, &cfg, 1);
1506 if (querydir && outfile != NULL) {
00c5e40c 1507 FILE *fp = fopen(outfile, "w");
1508 if (!fp) {
1509 fprintf(stderr, "%s: %s: open: %s\n", PNAME,
1510 outfile, strerror(errno));
1511 return 1;
1512 } else if (fputs(html, fp) < 0) {
1513 fprintf(stderr, "%s: %s: write: %s\n", PNAME,
1514 outfile, strerror(errno));
1515 fclose(fp);
1516 return 1;
1517 } else if (fclose(fp) < 0) {
1518 fprintf(stderr, "%s: %s: fclose: %s\n", PNAME,
1519 outfile, strerror(errno));
1520 return 1;
1521 }
1522 } else {
a2d04613 1523 if (!querydir) {
1524 printf("Content-type: text/html\n\n");
1525 }
00c5e40c 1526 fputs(html, stdout);
1527 }
1528 } else {
1529 /*
1530 * Multiple output files.
1531 */
1532 int dirlen = outfile ? 2+strlen(outfile) : 3;
1533 char prefix[dirlen];
144550c6 1534 if (outfile) {
1535 if (mkdir(outfile, 0777) < 0 && errno != EEXIST) {
1536 fprintf(stderr, "%s: %s: mkdir: %s\n", PNAME,
1537 outfile, strerror(errno));
1538 return 1;
1539 }
00c5e40c 1540 snprintf(prefix, dirlen, "%s/", outfile);
144550c6 1541 } else
00c5e40c 1542 snprintf(prefix, dirlen, "./");
1543
1544 unsigned long xi2;
a2d04613 1545 /*
1546 * pathbuf is only set up in the plain-HTML case and
1547 * not in the CGI case; but that's OK, because the
1548 * CGI case can't come to this branch of the if
1549 * anyway.
1550 */
00c5e40c 1551 make_successor(pathbuf);
1552 xi2 = trie_before(mappedfile, pathbuf);
1553
00c5e40c 1554 if (html_dump(mappedfile, xi, xi2, depth, &cfg, prefix))
1555 return 1;
92d3b326 1556 }
56cae6e1 1557
1558 munmap(mappedfile, totalsize);
92d3b326 1559 sfree(pathbuf);
444c684c 1560 } else if (mode == DUMP) {
1561 size_t maxpathlen;
1562 char *buf;
70322ae3 1563
444c684c 1564 fd = open(filename, O_RDONLY);
1565 if (fd < 0) {
1566 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1567 strerror(errno));
84849cbd 1568 return 1;
1569 }
444c684c 1570 if (fstat(fd, &st) < 0) {
bf53e756 1571 perror(PNAME ": fstat");
84849cbd 1572 return 1;
1573 }
444c684c 1574 totalsize = st.st_size;
1575 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
84849cbd 1576 if (!mappedfile) {
bf53e756 1577 perror(PNAME ": mmap");
84849cbd 1578 return 1;
1579 }
bb013b1f 1580 if (!trie_check_magic(mappedfile)) {
1581 fprintf(stderr, "%s: %s: magic numbers did not match\n"
1582 "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
1583 return 1;
1584 }
444c684c 1585 pathsep = trie_pathsep(mappedfile);
1586
1587 maxpathlen = trie_maxpathlen(mappedfile);
1588 buf = snewn(maxpathlen, char);
84849cbd 1589
bf53e756 1590 printf(DUMPHDR "%02x\n", (unsigned char)pathsep);
84849cbd 1591 tw = triewalk_new(mappedfile);
444c684c 1592 while ((tf = triewalk_next(tw, buf)) != NULL)
1593 dump_line(buf, tf);
84849cbd 1594 triewalk_free(tw);
56cae6e1 1595
1596 munmap(mappedfile, totalsize);
444c684c 1597 } else if (mode == HTTPD) {
1598 struct html_config pcfg;
1599 struct httpd_config dcfg;
70322ae3 1600
444c684c 1601 fd = open(filename, O_RDONLY);
1602 if (fd < 0) {
1603 fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
1604 strerror(errno));
1605 return 1;
1606 }
1607 if (fstat(fd, &st) < 0) {
bf53e756 1608 perror(PNAME ": fstat");
444c684c 1609 return 1;
1610 }
1611 totalsize = st.st_size;
1612 mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
1613 if (!mappedfile) {
bf53e756 1614 perror(PNAME ": mmap");
444c684c 1615 return 1;
1616 }
bb013b1f 1617 if (!trie_check_magic(mappedfile)) {
1618 fprintf(stderr, "%s: %s: magic numbers did not match\n"
1619 "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
1620 return 1;
1621 }
444c684c 1622 pathsep = trie_pathsep(mappedfile);
1623
1624 dcfg.address = httpserveraddr;
1625 dcfg.port = httpserverport;
a8a4d6d8 1626 dcfg.closeoneof = closeoneof;
444c684c 1627 dcfg.basicauthdata = httpauthdata;
c47f39de 1628 pcfg.uriformat = "/%|/%p/%|%|/%p";
444c684c 1629 pcfg.autoage = htmlautoagerange;
1630 pcfg.oldest = htmloldest;
1631 pcfg.newest = htmlnewest;
16139d21 1632 pcfg.showfiles = showfiles;
494ef23b 1633 pcfg.html_title = html_title;
444c684c 1634 run_httpd(mappedfile, auth, &dcfg, &pcfg);
56cae6e1 1635 munmap(mappedfile, totalsize);
355c3af7 1636 } else if (mode == REMOVE) {
1637 if (remove(filename) < 0) {
1638 fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename,
1639 strerror(errno));
1640 return 1;
1641 }
70322ae3 1642 }
70322ae3 1643 }
1644
1645 return 0;
1646}