70322ae3 |
1 | /* |
2 | * Main program for agedu. |
3 | */ |
4 | |
353bc75d |
5 | #include "agedu.h" |
50e82fdc |
6 | |
70322ae3 |
7 | #include "du.h" |
8 | #include "trie.h" |
9 | #include "index.h" |
995db599 |
10 | #include "alloc.h" |
70322ae3 |
11 | #include "html.h" |
12 | #include "httpd.h" |
84849cbd |
13 | #include "fgetline.h" |
70322ae3 |
14 | |
373a02e5 |
15 | /* |
16 | * Path separator. This global variable affects the behaviour of |
17 | * various parts of the code when they need to deal with path |
18 | * separators. The path separator appropriate to a particular data |
19 | * set is encoded in the index file storing that data set; data |
20 | * sets generated on Unix will of course have the default '/', but |
21 | * foreign data sets are conceivable and must be handled correctly. |
22 | */ |
23 | char pathsep = '/'; |
24 | |
70322ae3 |
25 | void fatal(const char *fmt, ...) |
26 | { |
27 | va_list ap; |
28 | fprintf(stderr, "%s: ", PNAME); |
29 | va_start(ap, fmt); |
30 | vfprintf(stderr, fmt, ap); |
31 | va_end(ap); |
32 | fprintf(stderr, "\n"); |
33 | exit(1); |
34 | } |
35 | |
9d0b9596 |
36 | struct inclusion_exclusion { |
0ba55302 |
37 | int type; |
9d0b9596 |
38 | const char *wildcard; |
39 | int path; |
40 | }; |
41 | |
70322ae3 |
42 | struct ctx { |
43 | triebuild *tb; |
44 | dev_t datafile_dev, filesystem_dev; |
45 | ino_t datafile_ino; |
46 | time_t last_output_update; |
8b1f55d6 |
47 | int progress, progwidth; |
84849cbd |
48 | int straight_to_dump; |
9d0b9596 |
49 | struct inclusion_exclusion *inex; |
50 | int ninex; |
51 | int crossfs; |
f59a5d34 |
52 | int usemtime; |
05b0f827 |
53 | int fakeatimes; |
70322ae3 |
54 | }; |
55 | |
84849cbd |
56 | static void dump_line(const char *pathname, const struct trie_file *tf) |
57 | { |
58 | const char *p; |
44d82778 |
59 | if (printf("%llu %llu ", tf->size, tf->atime) < 0) goto error; |
84849cbd |
60 | for (p = pathname; *p; p++) { |
44d82778 |
61 | if (*p >= ' ' && *p < 127 && *p != '%') { |
62 | if (putchar(*p) == EOF) goto error; |
63 | } else { |
64 | if (printf("%%%02x", (unsigned char)*p) < 0) goto error; |
65 | } |
84849cbd |
66 | } |
44d82778 |
67 | if (putchar('\n') == EOF) goto error; |
68 | return; |
69 | error: |
70 | fatal("standard output: %s", strerror(errno)); |
84849cbd |
71 | } |
72 | |
9c6e61f2 |
73 | static int gotdata(void *vctx, const char *pathname, const STRUCT_STAT *st) |
70322ae3 |
74 | { |
75 | struct ctx *ctx = (struct ctx *)vctx; |
76 | struct trie_file file; |
77 | time_t t; |
9d0b9596 |
78 | int i, include; |
79 | const char *filename; |
70322ae3 |
80 | |
81 | /* |
82 | * Filter out our own data file. |
83 | */ |
84 | if (st->st_dev == ctx->datafile_dev && st->st_ino == ctx->datafile_ino) |
85 | return 0; |
86 | |
87 | /* |
88 | * Don't cross the streams^W^Wany file system boundary. |
70322ae3 |
89 | */ |
9d0b9596 |
90 | if (!ctx->crossfs && st->st_dev != ctx->filesystem_dev) |
70322ae3 |
91 | return 0; |
92 | |
84849cbd |
93 | file.size = (unsigned long long)512 * st->st_blocks; |
f59a5d34 |
94 | if (ctx->usemtime || (ctx->fakeatimes && S_ISDIR(st->st_mode))) |
05b0f827 |
95 | file.atime = st->st_mtime; |
96 | else |
7e25423c |
97 | file.atime = max(st->st_mtime, st->st_atime); |
0ba55302 |
98 | |
70322ae3 |
99 | /* |
9d0b9596 |
100 | * Filter based on wildcards. |
70322ae3 |
101 | */ |
9d0b9596 |
102 | include = 1; |
373a02e5 |
103 | filename = strrchr(pathname, pathsep); |
9d0b9596 |
104 | if (!filename) |
105 | filename = pathname; |
106 | else |
107 | filename++; |
108 | for (i = 0; i < ctx->ninex; i++) { |
109 | if (fnmatch(ctx->inex[i].wildcard, |
0ba55302 |
110 | ctx->inex[i].path ? pathname : filename, 0) == 0) |
111 | include = ctx->inex[i].type; |
112 | } |
113 | if (include == -1) |
114 | return 0; /* ignore this entry and any subdirs */ |
115 | if (include == 0) { |
116 | /* |
117 | * Here we are supposed to be filtering an entry out, but |
118 | * still recursing into it if it's a directory. However, |
119 | * we can't actually leave out any directory whose |
120 | * subdirectories we then look at. So we cheat, in that |
121 | * case, by setting the size to zero. |
122 | */ |
123 | if (!S_ISDIR(st->st_mode)) |
124 | return 0; /* just ignore */ |
125 | else |
84849cbd |
126 | file.size = 0; |
9d0b9596 |
127 | } |
70322ae3 |
128 | |
84849cbd |
129 | if (ctx->straight_to_dump) |
130 | dump_line(pathname, &file); |
131 | else |
132 | triebuild_add(ctx->tb, pathname, &file); |
70322ae3 |
133 | |
84849cbd |
134 | if (ctx->progress) { |
135 | t = time(NULL); |
136 | if (t != ctx->last_output_update) { |
8b1f55d6 |
137 | fprintf(stderr, "%-*.*s\r", ctx->progwidth, ctx->progwidth, |
138 | pathname); |
139 | fflush(stderr); |
84849cbd |
140 | ctx->last_output_update = t; |
8b1f55d6 |
141 | } |
70322ae3 |
142 | } |
143 | |
144 | return 1; |
145 | } |
146 | |
09fd7619 |
147 | static void scan_error(void *vctx, const char *fmt, ...) |
148 | { |
149 | struct ctx *ctx = (struct ctx *)vctx; |
150 | va_list ap; |
151 | |
152 | if (ctx->progress) { |
153 | fprintf(stderr, "%-*s\r", ctx->progwidth, ""); |
154 | fflush(stderr); |
155 | } |
156 | |
157 | fprintf(stderr, "%s: ", PNAME); |
158 | va_start(ap, fmt); |
159 | vfprintf(stderr, fmt, ap); |
160 | va_end(ap); |
161 | |
162 | ctx->last_output_update--; /* force a progress report next time */ |
163 | } |
164 | |
e9e7a1bf |
165 | static void text_query(const void *mappedfile, const char *querydir, |
00c5e40c |
166 | time_t t, int showfiles, int depth, FILE *fp) |
70322ae3 |
167 | { |
168 | size_t maxpathlen; |
169 | char *pathbuf; |
170 | unsigned long xi1, xi2; |
16139d21 |
171 | unsigned long long size; |
70322ae3 |
172 | |
173 | maxpathlen = trie_maxpathlen(mappedfile); |
174 | pathbuf = snewn(maxpathlen + 1, char); |
175 | |
176 | /* |
177 | * We want to query everything between the supplied filename |
178 | * (inclusive) and that filename with a ^A on the end |
179 | * (exclusive). So find the x indices for each. |
180 | */ |
256c29a2 |
181 | strcpy(pathbuf, querydir); |
182 | make_successor(pathbuf); |
e9e7a1bf |
183 | xi1 = trie_before(mappedfile, querydir); |
70322ae3 |
184 | xi2 = trie_before(mappedfile, pathbuf); |
185 | |
16139d21 |
186 | if (!showfiles && xi2 - xi1 == 1) |
0313b788 |
187 | return; /* file, or empty dir => no display */ |
188 | |
70322ae3 |
189 | /* |
190 | * Now do the lookups in the age index. |
191 | */ |
16139d21 |
192 | if (xi2 - xi1 == 1) { |
193 | /* |
194 | * We are querying an individual file, so we should not |
195 | * depend on the index entries either side of the node, |
196 | * since they almost certainly don't both exist. Instead, |
197 | * just look up the file's size and atime in the main trie. |
198 | */ |
199 | const struct trie_file *f = trie_getfile(mappedfile, xi1); |
200 | if (f->atime < t) |
201 | size = f->size; |
202 | else |
203 | size = 0; |
204 | } else { |
205 | unsigned long long s1, s2; |
206 | s1 = index_query(mappedfile, xi1, t); |
207 | s2 = index_query(mappedfile, xi2, t); |
208 | size = s2 - s1; |
209 | } |
70322ae3 |
210 | |
16139d21 |
211 | if (size == 0) |
010dd2a2 |
212 | return; /* no space taken up => no display */ |
213 | |
00c5e40c |
214 | if (depth != 0) { |
70322ae3 |
215 | /* |
216 | * Now scan for first-level subdirectories and report |
217 | * those too. |
218 | */ |
00c5e40c |
219 | int newdepth = (depth > 0 ? depth - 1 : depth); |
70322ae3 |
220 | xi1++; |
221 | while (xi1 < xi2) { |
222 | trie_getpath(mappedfile, xi1, pathbuf); |
00c5e40c |
223 | text_query(mappedfile, pathbuf, t, showfiles, newdepth, fp); |
256c29a2 |
224 | make_successor(pathbuf); |
70322ae3 |
225 | xi1 = trie_before(mappedfile, pathbuf); |
226 | } |
227 | } |
16e591d6 |
228 | |
229 | /* Display in units of 1Kb */ |
00c5e40c |
230 | fprintf(fp, "%-11llu %s\n", (size) / 1024, querydir); |
70322ae3 |
231 | } |
232 | |
56fa1896 |
233 | /* |
234 | * Largely frivolous way to define all my command-line options. I |
235 | * present here a parametric macro which declares a series of |
236 | * _logical_ option identifiers, and for each one declares zero or |
237 | * more short option characters and zero or more long option |
238 | * words. Then I repeatedly invoke that macro with its arguments |
239 | * defined to be various other macros, which allows me to |
240 | * variously: |
241 | * |
242 | * - define an enum allocating a distinct integer value to each |
243 | * logical option id |
244 | * - define a string consisting of precisely all the short option |
245 | * characters |
246 | * - define a string array consisting of all the long option |
247 | * strings |
248 | * - define (with help from auxiliary enums) integer arrays |
249 | * parallel to both of the above giving the logical option id |
250 | * for each physical short and long option |
251 | * - define an array indexed by logical option id indicating |
e9e7a1bf |
252 | * whether the option in question takes a value |
253 | * - define a function which prints out brief online help for all |
254 | * the options. |
56fa1896 |
255 | * |
256 | * It's not at all clear to me that this trickery is actually |
257 | * particularly _efficient_ - it still, after all, requires going |
258 | * linearly through the option list at run time and doing a |
259 | * strcmp, whereas in an ideal world I'd have liked the lists of |
260 | * long and short options to be pre-sorted so that a binary search |
261 | * or some other more efficient lookup was possible. (Not that |
262 | * asymptotic algorithmic complexity is remotely vital in option |
263 | * parsing, but if I were doing this in, say, Lisp or something |
264 | * with an equivalently powerful preprocessor then once I'd had |
265 | * the idea of preparing the option-parsing data structures at |
266 | * compile time I would probably have made the effort to prepare |
267 | * them _properly_. I could have Perl generate me a source file |
268 | * from some sort of description, I suppose, but that would seem |
269 | * like overkill. And in any case, it's more of a challenge to |
270 | * achieve as much as possible by cunning use of cpp and enum than |
271 | * to just write some sensible and logical code in a Turing- |
272 | * complete language. I said it was largely frivolous :-) |
273 | * |
274 | * This approach does have the virtue that it brings together the |
e9e7a1bf |
275 | * option ids, option spellings and help text into a single |
276 | * combined list and defines them all in exactly one place. If I |
277 | * want to add a new option, or a new spelling for an option, I |
278 | * only have to modify the main OPTHELP macro below and then add |
279 | * code to process the new logical id. |
56fa1896 |
280 | * |
281 | * (Though, really, even that isn't ideal, since it still involves |
282 | * modifying the source file in more than one place. In a |
283 | * _properly_ ideal world, I'd be able to interleave the option |
284 | * definitions with the code fragments that process them. And then |
285 | * not bother defining logical identifiers for them at all - those |
286 | * would be automatically generated, since I wouldn't have any |
287 | * need to specify them manually in another part of the code.) |
c5c3510f |
288 | * |
289 | * One other helpful consequence of the enum-based structure here |
290 | * is that it causes a compiler error if I accidentally try to |
291 | * define the same option (short or long) twice. |
56fa1896 |
292 | */ |
293 | |
e9e7a1bf |
294 | #define OPTHELP(NOVAL, VAL, SHORT, LONG, HELPPFX, HELPARG, HELPLINE, HELPOPT) \ |
bf53e756 |
295 | HELPPFX("usage") HELPLINE(PNAME " [options] action [action...]") \ |
e9e7a1bf |
296 | HELPPFX("actions") \ |
297 | VAL(SCAN) SHORT(s) LONG(scan) \ |
298 | HELPARG("directory") HELPOPT("scan and index a directory") \ |
67159944 |
299 | NOVAL(HTTPD) SHORT(w) LONG(web) LONG(server) LONG(httpd) \ |
300 | HELPOPT("serve HTML reports from a temporary web server") \ |
301 | VAL(TEXT) SHORT(t) LONG(text) \ |
302 | HELPARG("subdir") HELPOPT("print a plain text report on a subdirectory") \ |
303 | NOVAL(REMOVE) SHORT(R) LONG(remove) LONG(delete) LONG(unlink) \ |
304 | HELPOPT("remove the index file") \ |
c5c3510f |
305 | NOVAL(DUMP) SHORT(D) LONG(dump) HELPOPT("dump the index file on stdout") \ |
c5c3510f |
306 | NOVAL(LOAD) SHORT(L) LONG(load) \ |
84849cbd |
307 | HELPOPT("load and index a dump file") \ |
67159944 |
308 | VAL(SCANDUMP) SHORT(S) LONG(scan_dump) \ |
309 | HELPARG("directory") HELPOPT("scan only, generating a dump") \ |
e9e7a1bf |
310 | VAL(HTML) SHORT(H) LONG(html) \ |
311 | HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \ |
e9e7a1bf |
312 | HELPPFX("options") \ |
313 | VAL(DATAFILE) SHORT(f) LONG(file) \ |
c5c3510f |
314 | HELPARG("filename") HELPOPT("[most modes] specify index file") \ |
56fa1896 |
315 | NOVAL(CROSSFS) LONG(cross_fs) \ |
e9e7a1bf |
316 | HELPOPT("[--scan] cross filesystem boundaries") \ |
56fa1896 |
317 | NOVAL(NOCROSSFS) LONG(no_cross_fs) \ |
e9e7a1bf |
318 | HELPOPT("[--scan] stick to one filesystem") \ |
0ba55302 |
319 | VAL(PRUNE) LONG(prune) \ |
320 | HELPARG("wildcard") HELPOPT("[--scan] prune files matching pattern") \ |
321 | VAL(PRUNEPATH) LONG(prune_path) \ |
322 | HELPARG("wildcard") HELPOPT("[--scan] prune pathnames matching pattern") \ |
67159944 |
323 | VAL(EXCLUDE) LONG(exclude) \ |
324 | HELPARG("wildcard") HELPOPT("[--scan] exclude files matching pattern") \ |
325 | VAL(EXCLUDEPATH) LONG(exclude_path) \ |
326 | HELPARG("wildcard") HELPOPT("[--scan] exclude pathnames matching pattern") \ |
327 | VAL(INCLUDE) LONG(include) \ |
328 | HELPARG("wildcard") HELPOPT("[--scan] include files matching pattern") \ |
329 | VAL(INCLUDEPATH) LONG(include_path) \ |
330 | HELPARG("wildcard") HELPOPT("[--scan] include pathnames matching pattern") \ |
331 | NOVAL(PROGRESS) LONG(progress) LONG(scan_progress) \ |
332 | HELPOPT("[--scan] report progress on stderr") \ |
333 | NOVAL(NOPROGRESS) LONG(no_progress) LONG(no_scan_progress) \ |
334 | HELPOPT("[--scan] do not report progress") \ |
335 | NOVAL(TTYPROGRESS) LONG(tty_progress) LONG(tty_scan_progress) \ |
336 | LONG(progress_tty) LONG(scan_progress_tty) \ |
337 | HELPOPT("[--scan] report progress if stderr is a tty") \ |
05b0f827 |
338 | NOVAL(DIRATIME) LONG(dir_atime) LONG(dir_atimes) \ |
67159944 |
339 | HELPOPT("[--scan,--load] keep real atimes on directories") \ |
05b0f827 |
340 | NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \ |
67159944 |
341 | HELPOPT("[--scan,--load] fake atimes on directories") \ |
f59a5d34 |
342 | NOVAL(MTIME) LONG(mtime) \ |
343 | HELPOPT("[--scan] use mtime instead of atime") \ |
16139d21 |
344 | NOVAL(SHOWFILES) LONG(files) \ |
345 | HELPOPT("[--web,--html,--text] list individual files") \ |
f2e52893 |
346 | VAL(AGERANGE) SHORT(r) LONG(age_range) LONG(range) LONG(ages) \ |
67159944 |
347 | HELPARG("age[-age]") HELPOPT("[--web,--html] set limits of colour coding") \ |
00c5e40c |
348 | VAL(OUTFILE) SHORT(o) LONG(output) \ |
349 | HELPARG("filename") HELPOPT("[--html] specify output file or directory name") \ |
1e8d78b9 |
350 | VAL(SERVERADDR) LONG(address) LONG(addr) LONG(server_address) \ |
351 | LONG(server_addr) \ |
352 | HELPARG("addr[:port]") HELPOPT("[--web] specify HTTP server address") \ |
e9e7a1bf |
353 | VAL(AUTH) LONG(auth) LONG(http_auth) LONG(httpd_auth) \ |
354 | LONG(server_auth) LONG(web_auth) \ |
355 | HELPARG("type") HELPOPT("[--web] specify HTTP authentication method") \ |
1e8d78b9 |
356 | VAL(AUTHFILE) LONG(auth_file) \ |
357 | HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \ |
358 | VAL(AUTHFD) LONG(auth_fd) \ |
359 | HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \ |
00c5e40c |
360 | VAL(DEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \ |
361 | HELPARG("levels") HELPOPT("[--text,--html] recurse to this many levels") \ |
67159944 |
362 | VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \ |
363 | HELPARG("age") HELPOPT("[--text] include only files older than this") \ |
e9e7a1bf |
364 | HELPPFX("also") \ |
365 | NOVAL(HELP) SHORT(h) LONG(help) HELPOPT("display this help text") \ |
366 | NOVAL(VERSION) SHORT(V) LONG(version) HELPOPT("report version number") \ |
367 | NOVAL(LICENCE) LONG(licence) LONG(license) \ |
368 | HELPOPT("display (MIT) licence text") \ |
56fa1896 |
369 | |
370 | #define IGNORE(x) |
371 | #define DEFENUM(x) OPT_ ## x, |
372 | #define ZERO(x) 0, |
373 | #define ONE(x) 1, |
374 | #define STRING(x) #x , |
375 | #define STRINGNOCOMMA(x) #x |
376 | #define SHORTNEWOPT(x) SHORTtmp_ ## x = OPT_ ## x, |
377 | #define SHORTTHISOPT(x) SHORTtmp2_ ## x, SHORTVAL_ ## x = SHORTtmp2_ ## x - 1, |
378 | #define SHORTOPTVAL(x) SHORTVAL_ ## x, |
379 | #define SHORTTMP(x) SHORTtmp3_ ## x, |
380 | #define LONGNEWOPT(x) LONGtmp_ ## x = OPT_ ## x, |
381 | #define LONGTHISOPT(x) LONGtmp2_ ## x, LONGVAL_ ## x = LONGtmp2_ ## x - 1, |
382 | #define LONGOPTVAL(x) LONGVAL_ ## x, |
383 | #define LONGTMP(x) SHORTtmp3_ ## x, |
384 | |
e9e7a1bf |
385 | #define OPTIONS(NOVAL, VAL, SHORT, LONG) \ |
386 | OPTHELP(NOVAL, VAL, SHORT, LONG, IGNORE, IGNORE, IGNORE, IGNORE) |
387 | |
56fa1896 |
388 | enum { OPTIONS(DEFENUM,DEFENUM,IGNORE,IGNORE) NOPTIONS }; |
389 | enum { OPTIONS(IGNORE,IGNORE,SHORTTMP,IGNORE) NSHORTOPTS }; |
390 | enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS }; |
391 | static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)}; |
392 | static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)}; |
393 | static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)}; |
394 | enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) }; |
395 | enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) }; |
396 | static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)}; |
397 | static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)}; |
398 | |
e9e7a1bf |
399 | static void usage(FILE *fp) |
400 | { |
401 | char longbuf[80]; |
402 | const char *prefix, *shortopt, *longopt, *optarg; |
403 | int i, optex; |
404 | |
405 | #define HELPRESET prefix = shortopt = longopt = optarg = NULL, optex = -1 |
406 | #define HELPNOVAL(s) optex = 0; |
407 | #define HELPVAL(s) optex = 1; |
408 | #define HELPSHORT(s) if (!shortopt) shortopt = "-" #s; |
409 | #define HELPLONG(s) if (!longopt) { \ |
410 | strcpy(longbuf, "--" #s); longopt = longbuf; \ |
411 | for (i = 0; longbuf[i]; i++) if (longbuf[i] == '_') longbuf[i] = '-'; } |
412 | #define HELPPFX(s) prefix = s; |
413 | #define HELPARG(s) optarg = s; |
414 | #define HELPLINE(s) assert(optex == -1); \ |
415 | fprintf(fp, "%7s%c %s\n", prefix?prefix:"", prefix?':':' ', s); \ |
416 | HELPRESET; |
417 | #define HELPOPT(s) assert((optex == 1 && optarg) || (optex == 0 && !optarg)); \ |
418 | assert(shortopt || longopt); \ |
419 | i = fprintf(fp, "%7s%c %s%s%s%s%s", prefix?prefix:"", prefix?':':' ', \ |
420 | shortopt?shortopt:"", shortopt&&longopt?", ":"", longopt?longopt:"", \ |
421 | optarg?" ":"", optarg?optarg:""); \ |
422 | fprintf(fp, "%*s %s\n", i<32?32-i:0,"",s); HELPRESET; |
423 | |
424 | HELPRESET; |
425 | OPTHELP(HELPNOVAL, HELPVAL, HELPSHORT, HELPLONG, |
426 | HELPPFX, HELPARG, HELPLINE, HELPOPT); |
427 | |
428 | #undef HELPRESET |
429 | #undef HELPNOVAL |
430 | #undef HELPVAL |
431 | #undef HELPSHORT |
432 | #undef HELPLONG |
433 | #undef HELPPFX |
434 | #undef HELPARG |
435 | #undef HELPLINE |
436 | #undef HELPOPT |
437 | } |
438 | |
f2e52893 |
439 | static time_t parse_age(time_t now, const char *agestr) |
440 | { |
441 | time_t t; |
442 | struct tm tm; |
443 | int nunits; |
444 | char unit[2]; |
445 | |
446 | t = now; |
447 | |
448 | if (2 != sscanf(agestr, "%d%1[DdWwMmYy]", &nunits, unit)) { |
449 | fprintf(stderr, "%s: age specification should be a number followed by" |
450 | " one of d,w,m,y\n", PNAME); |
451 | exit(1); |
452 | } |
453 | |
454 | if (unit[0] == 'd') { |
455 | t -= 86400 * nunits; |
456 | } else if (unit[0] == 'w') { |
457 | t -= 86400 * 7 * nunits; |
458 | } else { |
459 | int ym; |
460 | |
461 | tm = *localtime(&t); |
462 | ym = tm.tm_year * 12 + tm.tm_mon; |
463 | |
464 | if (unit[0] == 'm') |
465 | ym -= nunits; |
466 | else |
467 | ym -= 12 * nunits; |
468 | |
469 | tm.tm_year = ym / 12; |
470 | tm.tm_mon = ym % 12; |
471 | |
472 | t = mktime(&tm); |
473 | } |
474 | |
475 | return t; |
476 | } |
477 | |
70322ae3 |
478 | int main(int argc, char **argv) |
479 | { |
480 | int fd, count; |
481 | struct ctx actx, *ctx = &actx; |
482 | struct stat st; |
483 | off_t totalsize, realsize; |
484 | void *mappedfile; |
485 | triewalk *tw; |
486 | indexbuild *ib; |
14601b5d |
487 | const struct trie_file *tf, *prevtf; |
bf53e756 |
488 | char *filename = PNAME ".dat"; |
70322ae3 |
489 | int doing_opts = 1; |
355c3af7 |
490 | enum { TEXT, HTML, SCAN, DUMP, SCANDUMP, LOAD, HTTPD, REMOVE }; |
444c684c |
491 | struct action { |
492 | int mode; |
493 | char *arg; |
494 | } *actions = NULL; |
495 | int nactions = 0, actionsize = 0, action; |
f2e52893 |
496 | time_t now = time(NULL); |
497 | time_t textcutoff = now, htmlnewest = now, htmloldest = now; |
498 | int htmlautoagerange = 1; |
1e8d78b9 |
499 | const char *httpserveraddr = NULL; |
500 | int httpserverport = 0; |
501 | const char *httpauthdata = NULL; |
00c5e40c |
502 | const char *outfile = NULL; |
812e4bf2 |
503 | int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; |
8b1f55d6 |
504 | int progress = 1; |
9d0b9596 |
505 | struct inclusion_exclusion *inex = NULL; |
506 | int ninex = 0, inexsize = 0; |
507 | int crossfs = 0; |
00c5e40c |
508 | int depth = -1, gotdepth = 0; |
05b0f827 |
509 | int fakediratimes = 1; |
f59a5d34 |
510 | int mtime = 0; |
16139d21 |
511 | int showfiles = 0; |
70322ae3 |
512 | |
56fa1896 |
513 | #ifdef DEBUG_MAD_OPTION_PARSING_MACROS |
514 | { |
515 | static const char *const optnames[NOPTIONS] = { |
516 | OPTIONS(STRING,STRING,IGNORE,IGNORE) |
517 | }; |
518 | int i; |
519 | for (i = 0; i < NSHORTOPTS; i++) |
520 | printf("-%c == %s [%s]\n", shortopts[i], optnames[shortvals[i]], |
521 | opthasval[shortvals[i]] ? "value" : "no value"); |
522 | for (i = 0; i < NLONGOPTS; i++) |
523 | printf("--%s == %s [%s]\n", longopts[i], optnames[longvals[i]], |
524 | opthasval[longvals[i]] ? "value" : "no value"); |
525 | } |
526 | #endif |
527 | |
70322ae3 |
528 | while (--argc > 0) { |
529 | char *p = *++argv; |
70322ae3 |
530 | |
531 | if (doing_opts && *p == '-') { |
56fa1896 |
532 | int wordstart = 1; |
533 | |
70322ae3 |
534 | if (!strcmp(p, "--")) { |
535 | doing_opts = 0; |
56fa1896 |
536 | continue; |
537 | } |
538 | |
539 | p++; |
540 | while (*p) { |
541 | int optid = -1; |
542 | int i; |
543 | char *optval; |
544 | |
545 | if (wordstart && *p == '-') { |
70322ae3 |
546 | /* |
56fa1896 |
547 | * GNU-style long option. |
70322ae3 |
548 | */ |
56fa1896 |
549 | p++; |
550 | optval = strchr(p, '='); |
551 | if (optval) |
552 | *optval++ = '\0'; |
553 | |
554 | for (i = 0; i < NLONGOPTS; i++) { |
555 | const char *opt = longopts[i], *s = p; |
556 | int match = 1; |
557 | /* |
558 | * The underscores in the option names |
559 | * defined above may be given by the user |
560 | * as underscores or dashes, or omitted |
561 | * entirely. |
562 | */ |
563 | while (*opt) { |
564 | if (*opt == '_') { |
565 | if (*s == '-' || *s == '_') |
566 | s++; |
567 | } else { |
568 | if (*opt != *s) { |
569 | match = 0; |
570 | break; |
571 | } |
572 | s++; |
573 | } |
574 | opt++; |
575 | } |
576 | if (match && !*s) { |
577 | optid = longvals[i]; |
578 | break; |
70322ae3 |
579 | } |
580 | } |
56fa1896 |
581 | |
582 | if (optid < 0) { |
583 | fprintf(stderr, "%s: unrecognised option '--%s'\n", |
584 | PNAME, p); |
585 | return 1; |
586 | } |
587 | |
588 | if (!opthasval[optid]) { |
589 | if (optval) { |
590 | fprintf(stderr, "%s: unexpected argument to option" |
591 | " '--%s'\n", PNAME, p); |
812e4bf2 |
592 | return 1; |
593 | } |
56fa1896 |
594 | } else { |
595 | if (!optval) { |
596 | if (--argc > 0) { |
597 | optval = *++argv; |
598 | } else { |
599 | fprintf(stderr, "%s: option '--%s' expects" |
600 | " an argument\n", PNAME, p); |
601 | return 1; |
602 | } |
9d0b9596 |
603 | } |
70322ae3 |
604 | } |
56fa1896 |
605 | |
606 | p += strlen(p); /* finished with this argument word */ |
70322ae3 |
607 | } else { |
56fa1896 |
608 | /* |
609 | * Short option. |
610 | */ |
70322ae3 |
611 | char c = *p++; |
612 | |
56fa1896 |
613 | for (i = 0; i < NSHORTOPTS; i++) |
614 | if (c == shortopts[i]) { |
615 | optid = shortvals[i]; |
616 | break; |
617 | } |
618 | |
619 | if (optid < 0) { |
620 | fprintf(stderr, "%s: unrecognised option '-%c'\n", |
621 | PNAME, c); |
622 | return 1; |
623 | } |
624 | |
625 | if (opthasval[optid]) { |
70322ae3 |
626 | if (*p) { |
627 | optval = p; |
628 | p += strlen(p); |
629 | } else if (--argc > 0) { |
630 | optval = *++argv; |
631 | } else { |
56fa1896 |
632 | fprintf(stderr, "%s: option '-%c' expects" |
70322ae3 |
633 | " an argument\n", PNAME, c); |
634 | return 1; |
635 | } |
56fa1896 |
636 | } else { |
637 | optval = NULL; |
638 | } |
639 | } |
640 | |
641 | wordstart = 0; |
642 | |
643 | /* |
644 | * Now actually process the option. |
645 | */ |
646 | switch (optid) { |
647 | case OPT_HELP: |
e9e7a1bf |
648 | usage(stdout); |
56fa1896 |
649 | return 0; |
650 | case OPT_VERSION: |
e6fde1f7 |
651 | #ifdef PACKAGE_VERSION |
652 | printf("%s, revision %s\n", PNAME, PACKAGE_VERSION); |
653 | #else |
654 | printf("%s: version number not available when not built" |
655 | " via automake\n", PNAME); |
656 | #endif |
56fa1896 |
657 | return 0; |
658 | case OPT_LICENCE: |
5a29503d |
659 | { |
660 | extern const char *const licence[]; |
661 | int i; |
662 | |
663 | for (i = 0; licence[i]; i++) |
664 | fputs(licence[i], stdout); |
665 | |
666 | return 0; |
667 | } |
56fa1896 |
668 | return 0; |
669 | case OPT_SCAN: |
444c684c |
670 | if (nactions >= actionsize) { |
671 | actionsize = nactions * 3 / 2 + 16; |
672 | actions = sresize(actions, actionsize, struct action); |
673 | } |
674 | actions[nactions].mode = SCAN; |
675 | actions[nactions].arg = optval; |
676 | nactions++; |
56fa1896 |
677 | break; |
84849cbd |
678 | case OPT_SCANDUMP: |
444c684c |
679 | if (nactions >= actionsize) { |
680 | actionsize = nactions * 3 / 2 + 16; |
681 | actions = sresize(actions, actionsize, struct action); |
682 | } |
683 | actions[nactions].mode = SCANDUMP; |
684 | actions[nactions].arg = optval; |
685 | nactions++; |
84849cbd |
686 | break; |
56fa1896 |
687 | case OPT_DUMP: |
444c684c |
688 | if (nactions >= actionsize) { |
689 | actionsize = nactions * 3 / 2 + 16; |
690 | actions = sresize(actions, actionsize, struct action); |
691 | } |
692 | actions[nactions].mode = DUMP; |
693 | actions[nactions].arg = NULL; |
694 | nactions++; |
56fa1896 |
695 | break; |
84849cbd |
696 | case OPT_LOAD: |
444c684c |
697 | if (nactions >= actionsize) { |
698 | actionsize = nactions * 3 / 2 + 16; |
699 | actions = sresize(actions, actionsize, struct action); |
700 | } |
701 | actions[nactions].mode = LOAD; |
702 | actions[nactions].arg = NULL; |
703 | nactions++; |
84849cbd |
704 | break; |
56fa1896 |
705 | case OPT_TEXT: |
444c684c |
706 | if (nactions >= actionsize) { |
707 | actionsize = nactions * 3 / 2 + 16; |
708 | actions = sresize(actions, actionsize, struct action); |
709 | } |
710 | actions[nactions].mode = TEXT; |
711 | actions[nactions].arg = optval; |
712 | nactions++; |
56fa1896 |
713 | break; |
714 | case OPT_HTML: |
444c684c |
715 | if (nactions >= actionsize) { |
716 | actionsize = nactions * 3 / 2 + 16; |
717 | actions = sresize(actions, actionsize, struct action); |
718 | } |
719 | actions[nactions].mode = HTML; |
720 | actions[nactions].arg = optval; |
721 | nactions++; |
56fa1896 |
722 | break; |
723 | case OPT_HTTPD: |
444c684c |
724 | if (nactions >= actionsize) { |
725 | actionsize = nactions * 3 / 2 + 16; |
726 | actions = sresize(actions, actionsize, struct action); |
727 | } |
728 | actions[nactions].mode = HTTPD; |
729 | actions[nactions].arg = NULL; |
730 | nactions++; |
56fa1896 |
731 | break; |
355c3af7 |
732 | case OPT_REMOVE: |
733 | if (nactions >= actionsize) { |
734 | actionsize = nactions * 3 / 2 + 16; |
735 | actions = sresize(actions, actionsize, struct action); |
736 | } |
737 | actions[nactions].mode = REMOVE; |
738 | actions[nactions].arg = NULL; |
739 | nactions++; |
740 | break; |
56fa1896 |
741 | case OPT_PROGRESS: |
742 | progress = 2; |
743 | break; |
744 | case OPT_NOPROGRESS: |
745 | progress = 0; |
746 | break; |
747 | case OPT_TTYPROGRESS: |
748 | progress = 1; |
749 | break; |
750 | case OPT_CROSSFS: |
751 | crossfs = 1; |
752 | break; |
753 | case OPT_NOCROSSFS: |
754 | crossfs = 0; |
755 | break; |
05b0f827 |
756 | case OPT_DIRATIME: |
757 | fakediratimes = 0; |
758 | break; |
759 | case OPT_NODIRATIME: |
760 | fakediratimes = 1; |
761 | break; |
16139d21 |
762 | case OPT_SHOWFILES: |
763 | showfiles = 1; |
764 | break; |
f59a5d34 |
765 | case OPT_MTIME: |
766 | mtime = 1; |
767 | break; |
56fa1896 |
768 | case OPT_DATAFILE: |
769 | filename = optval; |
770 | break; |
00c5e40c |
771 | case OPT_DEPTH: |
772 | if (!strcasecmp(optval, "unlimited") || |
773 | !strcasecmp(optval, "infinity") || |
774 | !strcasecmp(optval, "infinite") || |
775 | !strcasecmp(optval, "inf") || |
776 | !strcasecmp(optval, "maximum") || |
777 | !strcasecmp(optval, "max")) |
778 | depth = -1; |
779 | else |
780 | depth = atoi(optval); |
781 | gotdepth = 1; |
782 | break; |
783 | case OPT_OUTFILE: |
784 | outfile = optval; |
16e591d6 |
785 | break; |
56fa1896 |
786 | case OPT_MINAGE: |
f2e52893 |
787 | textcutoff = parse_age(now, optval); |
788 | break; |
789 | case OPT_AGERANGE: |
790 | if (!strcmp(optval, "auto")) { |
791 | htmlautoagerange = 1; |
792 | } else { |
793 | char *q = optval + strcspn(optval, "-:"); |
794 | if (*q) |
795 | *q++ = '\0'; |
796 | htmloldest = parse_age(now, optval); |
797 | htmlnewest = *q ? parse_age(now, q) : now; |
798 | htmlautoagerange = 0; |
799 | } |
56fa1896 |
800 | break; |
1e8d78b9 |
801 | case OPT_SERVERADDR: |
802 | { |
803 | char *port; |
804 | if (optval[0] == '[' && |
805 | (port = strchr(optval, ']')) != NULL) |
806 | port++; |
807 | else |
808 | port = optval; |
809 | port += strcspn(port, ":"); |
810 | if (port) |
811 | *port++ = '\0'; |
812 | httpserveraddr = optval; |
813 | httpserverport = atoi(port); |
814 | } |
815 | break; |
56fa1896 |
816 | case OPT_AUTH: |
817 | if (!strcmp(optval, "magic")) |
818 | auth = HTTPD_AUTH_MAGIC; |
819 | else if (!strcmp(optval, "basic")) |
820 | auth = HTTPD_AUTH_BASIC; |
821 | else if (!strcmp(optval, "none")) |
822 | auth = HTTPD_AUTH_NONE; |
823 | else if (!strcmp(optval, "default")) |
824 | auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC; |
f2e52893 |
825 | else if (!strcmp(optval, "help") || |
826 | !strcmp(optval, "list")) { |
bf53e756 |
827 | printf(PNAME ": supported HTTP authentication types" |
f2e52893 |
828 | " are:\n" |
829 | " magic use Linux /proc/net/tcp to" |
830 | " determine owner of peer socket\n" |
831 | " basic HTTP Basic username and" |
832 | " password authentication\n" |
833 | " default use 'magic' if possible, " |
834 | " otherwise fall back to 'basic'\n" |
835 | " none unauthenticated HTTP (if" |
836 | " the data file is non-confidential)\n"); |
837 | return 0; |
838 | } else { |
56fa1896 |
839 | fprintf(stderr, "%s: unrecognised authentication" |
840 | " type '%s'\n%*s options are 'magic'," |
841 | " 'basic', 'none', 'default'\n", |
842 | PNAME, optval, (int)strlen(PNAME), ""); |
843 | return 1; |
844 | } |
845 | break; |
1e8d78b9 |
846 | case OPT_AUTHFILE: |
847 | case OPT_AUTHFD: |
848 | { |
849 | int fd; |
850 | char namebuf[40]; |
851 | const char *name; |
852 | char *authbuf; |
853 | int authlen, authsize; |
854 | int ret; |
855 | |
856 | if (optid == OPT_AUTHFILE) { |
857 | fd = open(optval, O_RDONLY); |
858 | if (fd < 0) { |
859 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, |
860 | optval, strerror(errno)); |
861 | return 1; |
862 | } |
863 | name = optval; |
864 | } else { |
865 | fd = atoi(optval); |
866 | name = namebuf; |
867 | sprintf(namebuf, "fd %d", fd); |
868 | } |
869 | |
870 | authlen = 0; |
871 | authsize = 256; |
872 | authbuf = snewn(authsize, char); |
873 | while ((ret = read(fd, authbuf+authlen, |
874 | authsize-authlen)) > 0) { |
875 | authlen += ret; |
876 | if ((authsize - authlen) < (authsize / 16)) { |
877 | authsize = authlen * 3 / 2 + 4096; |
878 | authbuf = sresize(authbuf, authsize, char); |
879 | } |
880 | } |
881 | if (ret < 0) { |
882 | fprintf(stderr, "%s: %s: read: %s\n", PNAME, |
883 | name, strerror(errno)); |
884 | return 1; |
885 | } |
886 | if (optid == OPT_AUTHFILE) |
887 | close(fd); |
888 | httpauthdata = authbuf; |
889 | } |
890 | break; |
56fa1896 |
891 | case OPT_INCLUDE: |
892 | case OPT_INCLUDEPATH: |
893 | case OPT_EXCLUDE: |
894 | case OPT_EXCLUDEPATH: |
0ba55302 |
895 | case OPT_PRUNE: |
896 | case OPT_PRUNEPATH: |
56fa1896 |
897 | if (ninex >= inexsize) { |
898 | inexsize = ninex * 3 / 2 + 16; |
899 | inex = sresize(inex, inexsize, |
900 | struct inclusion_exclusion); |
901 | } |
902 | inex[ninex].path = (optid == OPT_INCLUDEPATH || |
0ba55302 |
903 | optid == OPT_EXCLUDEPATH || |
904 | optid == OPT_PRUNEPATH); |
905 | inex[ninex].type = (optid == OPT_INCLUDE ? 1 : |
906 | optid == OPT_INCLUDEPATH ? 1 : |
907 | optid == OPT_EXCLUDE ? 0 : |
908 | optid == OPT_EXCLUDEPATH ? 0 : |
909 | optid == OPT_PRUNE ? -1 : |
910 | /* optid == OPT_PRUNEPATH ? */ -1); |
56fa1896 |
911 | inex[ninex].wildcard = optval; |
912 | ninex++; |
913 | break; |
914 | } |
915 | } |
70322ae3 |
916 | } else { |
e9e7a1bf |
917 | fprintf(stderr, "%s: unexpected argument '%s'\n", PNAME, p); |
918 | return 1; |
70322ae3 |
919 | } |
920 | } |
921 | |
444c684c |
922 | if (nactions == 0) { |
e9e7a1bf |
923 | usage(stderr); |
924 | return 1; |
444c684c |
925 | } |
926 | |
927 | for (action = 0; action < nactions; action++) { |
928 | int mode = actions[action].mode; |
929 | |
930 | if (mode == SCAN || mode == SCANDUMP || mode == LOAD) { |
931 | const char *scandir = actions[action].arg; |
14601b5d |
932 | |
444c684c |
933 | if (mode == LOAD) { |
934 | char *buf = fgetline(stdin); |
935 | unsigned newpathsep; |
936 | buf[strcspn(buf, "\r\n")] = '\0'; |
bf53e756 |
937 | if (1 != sscanf(buf, DUMPHDR "%x", |
444c684c |
938 | &newpathsep)) { |
939 | fprintf(stderr, "%s: header in dump file not recognised\n", |
940 | PNAME); |
941 | return 1; |
942 | } |
943 | pathsep = (char)newpathsep; |
944 | sfree(buf); |
84849cbd |
945 | } |
70322ae3 |
946 | |
444c684c |
947 | if (mode == SCAN || mode == LOAD) { |
948 | /* |
949 | * Prepare to write out the index file. |
950 | */ |
cc7db507 |
951 | fd = open(filename, O_RDWR | O_TRUNC | O_CREAT, |
952 | S_IRUSR | S_IWUSR); |
444c684c |
953 | if (fd < 0) { |
954 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
955 | strerror(errno)); |
956 | return 1; |
957 | } |
958 | if (fstat(fd, &st) < 0) { |
bf53e756 |
959 | perror(PNAME ": fstat"); |
444c684c |
960 | return 1; |
961 | } |
962 | ctx->datafile_dev = st.st_dev; |
963 | ctx->datafile_ino = st.st_ino; |
964 | ctx->straight_to_dump = 0; |
965 | } else { |
966 | ctx->datafile_dev = -1; |
967 | ctx->datafile_ino = -1; |
968 | ctx->straight_to_dump = 1; |
84849cbd |
969 | } |
444c684c |
970 | |
971 | if (mode == SCAN || mode == SCANDUMP) { |
972 | if (stat(scandir, &st) < 0) { |
973 | fprintf(stderr, "%s: %s: stat: %s\n", PNAME, scandir, |
974 | strerror(errno)); |
975 | return 1; |
976 | } |
977 | ctx->filesystem_dev = crossfs ? 0 : st.st_dev; |
84849cbd |
978 | } |
70322ae3 |
979 | |
444c684c |
980 | ctx->inex = inex; |
981 | ctx->ninex = ninex; |
982 | ctx->crossfs = crossfs; |
05b0f827 |
983 | ctx->fakeatimes = fakediratimes; |
f59a5d34 |
984 | ctx->usemtime = mtime; |
444c684c |
985 | |
986 | ctx->last_output_update = time(NULL); |
987 | |
988 | /* progress==1 means report progress only if stderr is a tty */ |
989 | if (progress == 1) |
990 | progress = isatty(2) ? 2 : 0; |
991 | ctx->progress = progress; |
992 | { |
993 | struct winsize ws; |
9cb5a01c |
994 | if (progress && |
995 | ioctl(2, TIOCGWINSZ, &ws) == 0 && |
996 | ws.ws_col > 0) |
444c684c |
997 | ctx->progwidth = ws.ws_col - 1; |
998 | else |
999 | ctx->progwidth = 79; |
84849cbd |
1000 | } |
84849cbd |
1001 | |
444c684c |
1002 | if (mode == SCANDUMP) |
bf53e756 |
1003 | printf(DUMPHDR "%02x\n", (unsigned char)pathsep); |
8b1f55d6 |
1004 | |
444c684c |
1005 | /* |
1006 | * Scan the directory tree, and write out the trie component |
1007 | * of the data file. |
1008 | */ |
1009 | if (mode != SCANDUMP) { |
1010 | ctx->tb = triebuild_new(fd); |
1011 | } |
1012 | if (mode == LOAD) { |
1013 | char *buf; |
1014 | int line = 2; |
1015 | while ((buf = fgetline(stdin)) != NULL) { |
1016 | struct trie_file tf; |
1017 | char *p, *q; |
1018 | |
1019 | buf[strcspn(buf, "\r\n")] = '\0'; |
1020 | |
1021 | p = buf; |
1022 | q = p; |
1023 | while (*p && *p != ' ') p++; |
1024 | if (!*p) { |
1025 | fprintf(stderr, "%s: dump file line %d: expected at least" |
1026 | " three fields\n", PNAME, line); |
1027 | return 1; |
1028 | } |
1029 | *p++ = '\0'; |
1030 | tf.size = strtoull(q, NULL, 10); |
1031 | q = p; |
1032 | while (*p && *p != ' ') p++; |
1033 | if (!*p) { |
1034 | fprintf(stderr, "%s: dump file line %d: expected at least" |
1035 | " three fields\n", PNAME, line); |
1036 | return 1; |
1037 | } |
1038 | *p++ = '\0'; |
1039 | tf.atime = strtoull(q, NULL, 10); |
1040 | q = buf; |
1041 | while (*p) { |
1042 | int c = *p; |
1043 | if (*p == '%') { |
1044 | int i; |
1045 | p++; |
1046 | c = 0; |
1047 | for (i = 0; i < 2; i++) { |
de693987 |
1048 | c *= 16; |
444c684c |
1049 | if (*p >= '0' && *p <= '9') |
1050 | c += *p - '0'; |
1051 | else if (*p >= 'A' && *p <= 'F') |
1052 | c += *p - ('A' - 10); |
1053 | else if (*p >= 'a' && *p <= 'f') |
1054 | c += *p - ('a' - 10); |
1055 | else { |
1056 | fprintf(stderr, "%s: dump file line %d: unable" |
1057 | " to parse hex escape\n", PNAME, line); |
1058 | } |
1059 | p++; |
1060 | } |
1f651677 |
1061 | } else { |
1062 | p++; |
444c684c |
1063 | } |
1064 | *q++ = c; |
444c684c |
1065 | } |
1066 | *q = '\0'; |
1067 | triebuild_add(ctx->tb, buf, &tf); |
1068 | sfree(buf); |
de693987 |
1069 | line++; |
444c684c |
1070 | } |
1071 | } else { |
09fd7619 |
1072 | du(scandir, gotdata, scan_error, ctx); |
444c684c |
1073 | } |
1074 | if (mode != SCANDUMP) { |
14601b5d |
1075 | size_t maxpathlen; |
522edd92 |
1076 | size_t delta; |
14601b5d |
1077 | char *buf, *prevbuf; |
1078 | |
444c684c |
1079 | count = triebuild_finish(ctx->tb); |
1080 | triebuild_free(ctx->tb); |
84849cbd |
1081 | |
444c684c |
1082 | if (ctx->progress) { |
1083 | fprintf(stderr, "%-*s\r", ctx->progwidth, ""); |
1084 | fflush(stderr); |
1085 | } |
84849cbd |
1086 | |
444c684c |
1087 | /* |
1088 | * Work out how much space the cumulative index trees |
1089 | * will take; enlarge the file, and memory-map it. |
1090 | */ |
1091 | if (fstat(fd, &st) < 0) { |
bf53e756 |
1092 | perror(PNAME ": fstat"); |
444c684c |
1093 | return 1; |
1094 | } |
84849cbd |
1095 | |
522edd92 |
1096 | printf("Built pathname index, %d entries," |
1097 | " %llu bytes of index\n", count, |
50e82fdc |
1098 | (unsigned long long)st.st_size); |
444c684c |
1099 | |
522edd92 |
1100 | totalsize = index_initial_size(st.st_size, count); |
1101 | totalsize += totalsize / 10; |
444c684c |
1102 | |
1103 | if (lseek(fd, totalsize-1, SEEK_SET) < 0) { |
bf53e756 |
1104 | perror(PNAME ": lseek"); |
84849cbd |
1105 | return 1; |
1106 | } |
444c684c |
1107 | if (write(fd, "\0", 1) < 1) { |
bf53e756 |
1108 | perror(PNAME ": write"); |
84849cbd |
1109 | return 1; |
1110 | } |
444c684c |
1111 | |
444c684c |
1112 | mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); |
1113 | if (!mappedfile) { |
bf53e756 |
1114 | perror(PNAME ": mmap"); |
444c684c |
1115 | return 1; |
84849cbd |
1116 | } |
444c684c |
1117 | |
05b0f827 |
1118 | if (fakediratimes) { |
1119 | printf("Faking directory atimes\n"); |
1120 | trie_fake_dir_atimes(mappedfile); |
1121 | } |
1122 | |
1123 | printf("Building index\n"); |
522edd92 |
1124 | ib = indexbuild_new(mappedfile, st.st_size, count, &delta); |
14601b5d |
1125 | maxpathlen = trie_maxpathlen(mappedfile); |
1126 | buf = snewn(maxpathlen, char); |
1127 | prevbuf = snewn(maxpathlen, char); |
444c684c |
1128 | tw = triewalk_new(mappedfile); |
14601b5d |
1129 | prevbuf[0] = '\0'; |
1130 | tf = triewalk_next(tw, buf); |
1131 | assert(tf); |
1132 | while (1) { |
1133 | int i; |
1134 | |
522edd92 |
1135 | if (totalsize - indexbuild_realsize(ib) < delta) { |
645dbd49 |
1136 | const void *oldfile = mappedfile; |
1137 | ptrdiff_t diff; |
1138 | |
522edd92 |
1139 | /* |
1140 | * Unmap the file, grow it, and remap it. |
1141 | */ |
1142 | munmap(mappedfile, totalsize); |
1143 | |
1144 | totalsize += delta; |
1145 | totalsize += totalsize / 10; |
1146 | |
1147 | if (lseek(fd, totalsize-1, SEEK_SET) < 0) { |
1148 | perror(PNAME ": lseek"); |
1149 | return 1; |
1150 | } |
1151 | if (write(fd, "\0", 1) < 1) { |
1152 | perror(PNAME ": write"); |
1153 | return 1; |
1154 | } |
1155 | |
1156 | mappedfile = mmap(NULL, totalsize, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); |
1157 | if (!mappedfile) { |
1158 | perror(PNAME ": mmap"); |
1159 | return 1; |
1160 | } |
1161 | |
1162 | indexbuild_rebase(ib, mappedfile); |
1163 | triewalk_rebase(tw, mappedfile); |
645dbd49 |
1164 | diff = (const unsigned char *)mappedfile - |
1165 | (const unsigned char *)oldfile; |
1166 | if (prevtf) |
1167 | prevtf = (const struct trie_file *) |
1168 | (((const unsigned char *)prevtf) + diff); |
1169 | if (tf) |
1170 | tf = (const struct trie_file *) |
1171 | (((const unsigned char *)tf) + diff); |
522edd92 |
1172 | } |
1173 | |
14601b5d |
1174 | /* |
1175 | * Get the next file from the index. So we are |
1176 | * currently holding, and have not yet |
1177 | * indexed, prevtf (with pathname prevbuf) and |
1178 | * tf (with pathname buf). |
1179 | */ |
1180 | prevtf = tf; |
1181 | memcpy(prevbuf, buf, maxpathlen); |
1182 | tf = triewalk_next(tw, buf); |
1183 | |
1184 | if (!tf) |
1185 | buf[0] = '\0'; |
1186 | |
1187 | /* |
1188 | * Find the first differing character position |
1189 | * between our two pathnames. |
1190 | */ |
1191 | for (i = 0; prevbuf[i] && prevbuf[i] == buf[i]; i++); |
1192 | |
1193 | /* |
1194 | * If prevbuf was a directory name and buf is |
1195 | * something inside that directory, then |
1196 | * trie_before() will be called on prevbuf |
1197 | * itself. Hence we must drop a tag before it, |
1198 | * so that the resulting index is usable. |
1199 | */ |
1200 | if ((!prevbuf[i] && (buf[i] == pathsep || |
1201 | (i > 0 && buf[i-1] == pathsep)))) |
1202 | indexbuild_tag(ib); |
1203 | |
1204 | /* |
1205 | * Add prevtf to the index. |
1206 | */ |
1207 | indexbuild_add(ib, prevtf); |
1208 | |
1209 | if (!tf) { |
1210 | /* |
1211 | * Drop an unconditional final tag, and |
1212 | * get out of this loop. |
1213 | */ |
1214 | indexbuild_tag(ib); |
1215 | break; |
1216 | } |
14601b5d |
1217 | |
1218 | /* |
1219 | * If prevbuf was a filename inside some |
1220 | * directory which buf is outside, then |
1221 | * trie_before() will be called on some |
1222 | * pathname either equal to buf or epsilon |
1223 | * less than it. Either way, we're going to |
1224 | * need to drop a tag after prevtf. |
1225 | */ |
1226 | if (strchr(prevbuf+i, pathsep) || !tf) |
1227 | indexbuild_tag(ib); |
1228 | } |
1229 | |
444c684c |
1230 | triewalk_free(tw); |
1231 | realsize = indexbuild_realsize(ib); |
1232 | indexbuild_free(ib); |
1233 | |
1234 | munmap(mappedfile, totalsize); |
1235 | ftruncate(fd, realsize); |
1236 | close(fd); |
522edd92 |
1237 | printf("Final index file size = %llu bytes\n", |
50e82fdc |
1238 | (unsigned long long)realsize); |
84849cbd |
1239 | } |
444c684c |
1240 | } else if (mode == TEXT) { |
1241 | char *querydir = actions[action].arg; |
1242 | size_t pathlen; |
70322ae3 |
1243 | |
444c684c |
1244 | fd = open(filename, O_RDONLY); |
1245 | if (fd < 0) { |
1246 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
1247 | strerror(errno)); |
1248 | return 1; |
1249 | } |
1250 | if (fstat(fd, &st) < 0) { |
bf53e756 |
1251 | perror(PNAME ": fstat"); |
444c684c |
1252 | return 1; |
1253 | } |
1254 | totalsize = st.st_size; |
1255 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
1256 | if (!mappedfile) { |
bf53e756 |
1257 | perror(PNAME ": mmap"); |
444c684c |
1258 | return 1; |
84849cbd |
1259 | } |
444c684c |
1260 | pathsep = trie_pathsep(mappedfile); |
70322ae3 |
1261 | |
84849cbd |
1262 | /* |
444c684c |
1263 | * Trim trailing slash, just in case. |
84849cbd |
1264 | */ |
444c684c |
1265 | pathlen = strlen(querydir); |
1266 | if (pathlen > 0 && querydir[pathlen-1] == pathsep) |
1267 | querydir[--pathlen] = '\0'; |
1268 | |
00c5e40c |
1269 | if (!gotdepth) |
1270 | depth = 1; /* default for text mode */ |
1271 | if (outfile != NULL) { |
1272 | FILE *fp = fopen(outfile, "w"); |
1273 | if (!fp) { |
1274 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, |
1275 | outfile, strerror(errno)); |
1276 | return 1; |
1277 | } |
1278 | text_query(mappedfile, querydir, textcutoff, showfiles, |
1279 | depth, fp); |
1280 | fclose(fp); |
1281 | } else { |
1282 | text_query(mappedfile, querydir, textcutoff, showfiles, |
1283 | depth, stdout); |
1284 | } |
56cae6e1 |
1285 | |
1286 | munmap(mappedfile, totalsize); |
444c684c |
1287 | } else if (mode == HTML) { |
1288 | char *querydir = actions[action].arg; |
92d3b326 |
1289 | size_t pathlen, maxpathlen; |
1290 | char *pathbuf; |
444c684c |
1291 | struct html_config cfg; |
1292 | unsigned long xi; |
1293 | char *html; |
1294 | |
1295 | fd = open(filename, O_RDONLY); |
1296 | if (fd < 0) { |
1297 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
1298 | strerror(errno)); |
1299 | return 1; |
1300 | } |
84849cbd |
1301 | if (fstat(fd, &st) < 0) { |
bf53e756 |
1302 | perror(PNAME ": fstat"); |
84849cbd |
1303 | return 1; |
1304 | } |
444c684c |
1305 | totalsize = st.st_size; |
1306 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
1307 | if (!mappedfile) { |
bf53e756 |
1308 | perror(PNAME ": mmap"); |
444c684c |
1309 | return 1; |
1310 | } |
1311 | pathsep = trie_pathsep(mappedfile); |
70322ae3 |
1312 | |
92d3b326 |
1313 | maxpathlen = trie_maxpathlen(mappedfile); |
1314 | pathbuf = snewn(maxpathlen, char); |
1315 | |
444c684c |
1316 | /* |
1317 | * Trim trailing slash, just in case. |
1318 | */ |
1319 | pathlen = strlen(querydir); |
1320 | if (pathlen > 0 && querydir[pathlen-1] == pathsep) |
1321 | querydir[--pathlen] = '\0'; |
1322 | |
1323 | xi = trie_before(mappedfile, querydir); |
92d3b326 |
1324 | if (xi >= trie_count(mappedfile) || |
1325 | (trie_getpath(mappedfile, xi, pathbuf), |
1326 | strcmp(pathbuf, querydir))) { |
1327 | fprintf(stderr, "%s: pathname '%s' does not exist in index\n" |
1328 | "%*s(check it is spelled exactly as it is in the " |
1329 | "index, including\n%*sany leading './')\n", |
1330 | PNAME, querydir, |
1331 | (int)(1+sizeof(PNAME)), "", |
1332 | (int)(1+sizeof(PNAME)), ""); |
1333 | } else if (!index_has_root(mappedfile, xi)) { |
1334 | fprintf(stderr, "%s: pathname '%s' is" |
1335 | " a file, not a directory\n", PNAME, querydir); |
00c5e40c |
1336 | } else if (!gotdepth) { |
1337 | /* |
1338 | * Single output file. |
1339 | */ |
92d3b326 |
1340 | cfg.format = NULL; |
00c5e40c |
1341 | cfg.rootpage = NULL; |
92d3b326 |
1342 | cfg.autoage = htmlautoagerange; |
1343 | cfg.oldest = htmloldest; |
1344 | cfg.newest = htmlnewest; |
16139d21 |
1345 | cfg.showfiles = showfiles; |
00c5e40c |
1346 | html = html_query(mappedfile, xi, &cfg, 0); |
1347 | if (outfile != NULL) { |
1348 | FILE *fp = fopen(outfile, "w"); |
1349 | if (!fp) { |
1350 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, |
1351 | outfile, strerror(errno)); |
1352 | return 1; |
1353 | } else if (fputs(html, fp) < 0) { |
1354 | fprintf(stderr, "%s: %s: write: %s\n", PNAME, |
1355 | outfile, strerror(errno)); |
1356 | fclose(fp); |
1357 | return 1; |
1358 | } else if (fclose(fp) < 0) { |
1359 | fprintf(stderr, "%s: %s: fclose: %s\n", PNAME, |
1360 | outfile, strerror(errno)); |
1361 | return 1; |
1362 | } |
1363 | } else { |
1364 | fputs(html, stdout); |
1365 | } |
1366 | } else { |
1367 | /* |
1368 | * Multiple output files. |
1369 | */ |
1370 | int dirlen = outfile ? 2+strlen(outfile) : 3; |
1371 | char prefix[dirlen]; |
1372 | if (outfile) |
1373 | snprintf(prefix, dirlen, "%s/", outfile); |
1374 | else |
1375 | snprintf(prefix, dirlen, "./"); |
1376 | |
1377 | unsigned long xi2; |
1378 | make_successor(pathbuf); |
1379 | xi2 = trie_before(mappedfile, pathbuf); |
1380 | |
1381 | cfg.format = "%lu.html"; |
1382 | cfg.rootpage = "index.html"; |
1383 | cfg.autoage = htmlautoagerange; |
1384 | cfg.oldest = htmloldest; |
1385 | cfg.newest = htmlnewest; |
1386 | cfg.showfiles = showfiles; |
1387 | if (html_dump(mappedfile, xi, xi2, depth, &cfg, prefix)) |
1388 | return 1; |
92d3b326 |
1389 | } |
56cae6e1 |
1390 | |
1391 | munmap(mappedfile, totalsize); |
92d3b326 |
1392 | sfree(pathbuf); |
444c684c |
1393 | } else if (mode == DUMP) { |
1394 | size_t maxpathlen; |
1395 | char *buf; |
70322ae3 |
1396 | |
444c684c |
1397 | fd = open(filename, O_RDONLY); |
1398 | if (fd < 0) { |
1399 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
1400 | strerror(errno)); |
84849cbd |
1401 | return 1; |
1402 | } |
444c684c |
1403 | if (fstat(fd, &st) < 0) { |
bf53e756 |
1404 | perror(PNAME ": fstat"); |
84849cbd |
1405 | return 1; |
1406 | } |
444c684c |
1407 | totalsize = st.st_size; |
1408 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
84849cbd |
1409 | if (!mappedfile) { |
bf53e756 |
1410 | perror(PNAME ": mmap"); |
84849cbd |
1411 | return 1; |
1412 | } |
444c684c |
1413 | pathsep = trie_pathsep(mappedfile); |
1414 | |
1415 | maxpathlen = trie_maxpathlen(mappedfile); |
1416 | buf = snewn(maxpathlen, char); |
84849cbd |
1417 | |
bf53e756 |
1418 | printf(DUMPHDR "%02x\n", (unsigned char)pathsep); |
84849cbd |
1419 | tw = triewalk_new(mappedfile); |
444c684c |
1420 | while ((tf = triewalk_next(tw, buf)) != NULL) |
1421 | dump_line(buf, tf); |
84849cbd |
1422 | triewalk_free(tw); |
56cae6e1 |
1423 | |
1424 | munmap(mappedfile, totalsize); |
444c684c |
1425 | } else if (mode == HTTPD) { |
1426 | struct html_config pcfg; |
1427 | struct httpd_config dcfg; |
70322ae3 |
1428 | |
444c684c |
1429 | fd = open(filename, O_RDONLY); |
1430 | if (fd < 0) { |
1431 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename, |
1432 | strerror(errno)); |
1433 | return 1; |
1434 | } |
1435 | if (fstat(fd, &st) < 0) { |
bf53e756 |
1436 | perror(PNAME ": fstat"); |
444c684c |
1437 | return 1; |
1438 | } |
1439 | totalsize = st.st_size; |
1440 | mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0); |
1441 | if (!mappedfile) { |
bf53e756 |
1442 | perror(PNAME ": mmap"); |
444c684c |
1443 | return 1; |
1444 | } |
1445 | pathsep = trie_pathsep(mappedfile); |
1446 | |
1447 | dcfg.address = httpserveraddr; |
1448 | dcfg.port = httpserverport; |
1449 | dcfg.basicauthdata = httpauthdata; |
1450 | pcfg.format = NULL; |
00c5e40c |
1451 | pcfg.rootpage = NULL; |
444c684c |
1452 | pcfg.autoage = htmlautoagerange; |
1453 | pcfg.oldest = htmloldest; |
1454 | pcfg.newest = htmlnewest; |
16139d21 |
1455 | pcfg.showfiles = showfiles; |
444c684c |
1456 | run_httpd(mappedfile, auth, &dcfg, &pcfg); |
56cae6e1 |
1457 | munmap(mappedfile, totalsize); |
355c3af7 |
1458 | } else if (mode == REMOVE) { |
1459 | if (remove(filename) < 0) { |
1460 | fprintf(stderr, "%s: %s: remove: %s\n", PNAME, filename, |
1461 | strerror(errno)); |
1462 | return 1; |
1463 | } |
70322ae3 |
1464 | } |
70322ae3 |
1465 | } |
1466 | |
1467 | return 0; |
1468 | } |