70322ae3 |
1 | /* |
2 | * html.c: implementation of html.h. |
3 | */ |
4 | |
353bc75d |
5 | #include "agedu.h" |
70322ae3 |
6 | #include "html.h" |
995db599 |
7 | #include "alloc.h" |
70322ae3 |
8 | #include "trie.h" |
9 | #include "index.h" |
10 | |
70322ae3 |
11 | #define MAXCOLOUR 511 |
12 | |
13 | struct html { |
14 | char *buf; |
15 | size_t buflen, bufsize; |
16 | const void *t; |
17 | unsigned long long totalsize, oldest, newest; |
18 | char *path2; |
c47f39de |
19 | char *oururi; |
70322ae3 |
20 | size_t hreflen; |
c47f39de |
21 | const char *uriformat; |
3f940260 |
22 | unsigned long long thresholds[MAXCOLOUR]; |
23 | char *titletexts[MAXCOLOUR+1]; |
70322ae3 |
24 | time_t now; |
25 | }; |
26 | |
afe761f3 |
27 | static void vhtprintf(struct html *ctx, const char *fmt, va_list ap) |
70322ae3 |
28 | { |
29 | va_list ap2; |
30 | int size, size2; |
50e82fdc |
31 | char testbuf[2]; |
70322ae3 |
32 | |
33 | va_copy(ap2, ap); |
50e82fdc |
34 | /* |
35 | * Some C libraries (Solaris, I'm looking at you) don't like |
36 | * an output buffer size of zero in vsnprintf, but will return |
37 | * sensible values given any non-zero buffer size. Hence, we |
38 | * use testbuf to gauge the length of the string. |
39 | */ |
40 | size = vsnprintf(testbuf, 1, fmt, ap2); |
70322ae3 |
41 | va_end(ap2); |
42 | |
43 | if (ctx->buflen + size >= ctx->bufsize) { |
44 | ctx->bufsize = (ctx->buflen + size) * 3 / 2 + 1024; |
45 | ctx->buf = sresize(ctx->buf, ctx->bufsize, char); |
46 | } |
47 | size2 = vsnprintf(ctx->buf + ctx->buflen, ctx->bufsize - ctx->buflen, |
48 | fmt, ap); |
49 | assert(size == size2); |
50 | ctx->buflen += size; |
51 | } |
52 | |
afe761f3 |
53 | static void htprintf(struct html *ctx, const char *fmt, ...) |
70322ae3 |
54 | { |
55 | va_list ap; |
56 | va_start(ap, fmt); |
57 | vhtprintf(ctx, fmt, ap); |
58 | va_end(ap); |
59 | } |
60 | |
61 | static unsigned long long round_and_format_age(struct html *ctx, |
62 | unsigned long long age, |
63 | char *buf, int direction) |
64 | { |
65 | struct tm tm, tm2; |
66 | char newbuf[80]; |
67 | unsigned long long ret, newret; |
68 | int i; |
69 | int ym; |
70 | static const int minutes[] = { 5, 10, 15, 30, 45 }; |
71 | |
72 | tm = *localtime(&ctx->now); |
73 | ym = tm.tm_year * 12 + tm.tm_mon; |
74 | |
75 | ret = ctx->now; |
76 | strcpy(buf, "Now"); |
77 | |
78 | for (i = 0; i < lenof(minutes); i++) { |
79 | newret = ctx->now - minutes[i] * 60; |
80 | sprintf(newbuf, "%d minutes", minutes[i]); |
81 | if (newret < age) |
82 | goto finish; |
83 | strcpy(buf, newbuf); |
84 | ret = newret; |
85 | } |
86 | |
87 | for (i = 1; i < 24; i++) { |
88 | newret = ctx->now - i * (60*60); |
89 | sprintf(newbuf, "%d hour%s", i, i==1 ? "" : "s"); |
90 | if (newret < age) |
91 | goto finish; |
92 | strcpy(buf, newbuf); |
93 | ret = newret; |
94 | } |
95 | |
96 | for (i = 1; i < 7; i++) { |
97 | newret = ctx->now - i * (24*60*60); |
98 | sprintf(newbuf, "%d day%s", i, i==1 ? "" : "s"); |
99 | if (newret < age) |
100 | goto finish; |
101 | strcpy(buf, newbuf); |
102 | ret = newret; |
103 | } |
104 | |
105 | for (i = 1; i < 4; i++) { |
106 | newret = ctx->now - i * (7*24*60*60); |
107 | sprintf(newbuf, "%d week%s", i, i==1 ? "" : "s"); |
108 | if (newret < age) |
109 | goto finish; |
110 | strcpy(buf, newbuf); |
111 | ret = newret; |
112 | } |
113 | |
114 | for (i = 1; i < 11; i++) { |
115 | tm2 = tm; /* structure copy */ |
116 | tm2.tm_year = (ym - i) / 12; |
117 | tm2.tm_mon = (ym - i) % 12; |
118 | newret = mktime(&tm2); |
119 | sprintf(newbuf, "%d month%s", i, i==1 ? "" : "s"); |
120 | if (newret < age) |
121 | goto finish; |
122 | strcpy(buf, newbuf); |
123 | ret = newret; |
124 | } |
125 | |
126 | for (i = 1;; i++) { |
127 | tm2 = tm; /* structure copy */ |
128 | tm2.tm_year = (ym - i*12) / 12; |
129 | tm2.tm_mon = (ym - i*12) % 12; |
130 | newret = mktime(&tm2); |
131 | sprintf(newbuf, "%d year%s", i, i==1 ? "" : "s"); |
132 | if (newret < age) |
133 | goto finish; |
134 | strcpy(buf, newbuf); |
135 | ret = newret; |
136 | } |
137 | |
138 | finish: |
139 | if (direction > 0) { |
140 | /* |
141 | * Round toward newest, i.e. use the existing (buf,ret). |
142 | */ |
143 | } else if (direction < 0) { |
144 | /* |
145 | * Round toward oldest, i.e. use (newbuf,newret); |
146 | */ |
147 | strcpy(buf, newbuf); |
148 | ret = newret; |
149 | } else { |
150 | /* |
151 | * Round to nearest. |
152 | */ |
153 | if (ret - age > age - newret) { |
154 | strcpy(buf, newbuf); |
155 | ret = newret; |
156 | } |
157 | } |
158 | return ret; |
159 | } |
160 | |
161 | static void get_indices(const void *t, char *path, |
162 | unsigned long *xi1, unsigned long *xi2) |
163 | { |
164 | size_t pathlen = strlen(path); |
256c29a2 |
165 | int c1 = path[pathlen], c2 = (pathlen > 0 ? path[pathlen-1] : 0); |
70322ae3 |
166 | |
167 | *xi1 = trie_before(t, path); |
256c29a2 |
168 | make_successor(path); |
70322ae3 |
169 | *xi2 = trie_before(t, path); |
256c29a2 |
170 | path[pathlen] = c1; |
171 | if (pathlen > 0) |
172 | path[pathlen-1] = c2; |
70322ae3 |
173 | } |
174 | |
3f940260 |
175 | static unsigned long long fetch_size(const void *t, |
176 | unsigned long xi1, unsigned long xi2, |
70322ae3 |
177 | unsigned long long atime) |
178 | { |
16139d21 |
179 | if (xi2 - xi1 == 1) { |
180 | /* |
181 | * We are querying an individual file, so we should not |
182 | * depend on the index entries either side of the node, |
183 | * since they almost certainly don't both exist. Instead, |
184 | * just look up the file's size and atime in the main trie. |
185 | */ |
186 | const struct trie_file *f = trie_getfile(t, xi1); |
187 | if (f->atime < atime) |
188 | return f->size; |
189 | else |
190 | return 0; |
191 | } else { |
192 | return index_query(t, xi2, atime) - index_query(t, xi1, atime); |
193 | } |
70322ae3 |
194 | } |
195 | |
196 | static void htescape(struct html *ctx, const char *s, int n, int italics) |
197 | { |
198 | while (n > 0 && *s) { |
199 | unsigned char c = (unsigned char)*s++; |
200 | |
201 | if (c == '&') |
202 | htprintf(ctx, "&"); |
203 | else if (c == '<') |
204 | htprintf(ctx, "<"); |
205 | else if (c == '>') |
206 | htprintf(ctx, ">"); |
207 | else if (c >= ' ' && c < '\177') |
208 | htprintf(ctx, "%c", c); |
209 | else { |
210 | if (italics) htprintf(ctx, "<i>"); |
211 | htprintf(ctx, "[%02x]", c); |
212 | if (italics) htprintf(ctx, "</i>"); |
213 | } |
214 | |
215 | n--; |
216 | } |
217 | } |
218 | |
219 | static void begin_colour_bar(struct html *ctx) |
220 | { |
221 | htprintf(ctx, "<table cellspacing=0 cellpadding=0" |
222 | " style=\"border:0\">\n<tr>\n"); |
223 | } |
224 | |
225 | static void add_to_colour_bar(struct html *ctx, int colour, int pixels) |
226 | { |
227 | int r, g, b; |
70322ae3 |
228 | |
229 | if (colour >= 0 && colour < 256) /* red -> yellow fade */ |
230 | r = 255, g = colour, b = 0; |
231 | else if (colour >= 256 && colour <= 511) /* yellow -> green fade */ |
232 | r = 511 - colour, g = 255, b = 0; |
233 | else /* background grey */ |
234 | r = g = b = 240; |
235 | |
70322ae3 |
236 | if (pixels > 0) { |
237 | htprintf(ctx, "<td style=\"width:%dpx; height:1em; " |
238 | "background-color:#%02x%02x%02x\"", |
239 | pixels, r, g, b); |
240 | if (colour >= 0) |
3f940260 |
241 | htprintf(ctx, " title=\"%s\"", ctx->titletexts[colour]); |
70322ae3 |
242 | htprintf(ctx, "></td>\n"); |
243 | } |
244 | } |
245 | |
246 | static void end_colour_bar(struct html *ctx) |
247 | { |
248 | htprintf(ctx, "</tr>\n</table>\n"); |
249 | } |
250 | |
251 | struct vector { |
16139d21 |
252 | int want_href, essential; |
70322ae3 |
253 | char *name; |
b49db535 |
254 | int literal; /* should the name be formatted in fixed-pitch? */ |
70322ae3 |
255 | unsigned long index; |
256 | unsigned long long sizes[MAXCOLOUR+1]; |
257 | }; |
258 | |
259 | int vec_compare(const void *av, const void *bv) |
260 | { |
261 | const struct vector *a = *(const struct vector **)av; |
262 | const struct vector *b = *(const struct vector **)bv; |
263 | |
264 | if (a->sizes[MAXCOLOUR] > b->sizes[MAXCOLOUR]) |
265 | return -1; |
266 | else if (a->sizes[MAXCOLOUR] < b->sizes[MAXCOLOUR]) |
267 | return +1; |
268 | else if (a->want_href < b->want_href) |
269 | return +1; |
270 | else if (a->want_href > b->want_href) |
271 | return -1; |
272 | else if (a->want_href) |
273 | return strcmp(a->name, b->name); |
274 | else if (a->index < b->index) |
275 | return -1; |
276 | else if (a->index > b->index) |
277 | return +1; |
16139d21 |
278 | else if (a->essential < b->essential) |
279 | return +1; |
280 | else if (a->essential > b->essential) |
281 | return -1; |
70322ae3 |
282 | return 0; |
283 | } |
284 | |
285 | static struct vector *make_vector(struct html *ctx, char *path, |
16139d21 |
286 | int want_href, int essential, |
287 | char *name, int literal) |
70322ae3 |
288 | { |
289 | unsigned long xi1, xi2; |
290 | struct vector *vec = snew(struct vector); |
291 | int i; |
292 | |
293 | vec->want_href = want_href; |
16139d21 |
294 | vec->essential = essential; |
70322ae3 |
295 | vec->name = name ? dupstr(name) : NULL; |
b49db535 |
296 | vec->literal = literal; |
70322ae3 |
297 | |
298 | get_indices(ctx->t, path, &xi1, &xi2); |
299 | |
300 | vec->index = xi1; |
301 | |
302 | for (i = 0; i <= MAXCOLOUR; i++) { |
303 | unsigned long long atime; |
304 | if (i == MAXCOLOUR) |
305 | atime = ULLONG_MAX; |
306 | else |
307 | atime = ctx->thresholds[i]; |
3f940260 |
308 | vec->sizes[i] = fetch_size(ctx->t, xi1, xi2, atime); |
70322ae3 |
309 | } |
310 | |
311 | return vec; |
312 | } |
313 | |
314 | static void print_heading(struct html *ctx, const char *title) |
315 | { |
316 | htprintf(ctx, "<tr style=\"padding: 0.2em; background-color:#e0e0e0\">\n" |
317 | "<td colspan=4 align=center>%s</td>\n</tr>\n", title); |
318 | } |
319 | |
afe761f3 |
320 | static void compute_display_size(unsigned long long size, |
321 | const char **fmt, double *display_size) |
322 | { |
323 | static const char *const fmts[] = { |
324 | "%g b", "%g Kb", "%#.1f Mb", "%#.1f Gb", "%#.1f Tb", |
325 | "%#.1f Pb", "%#.1f Eb", "%#.1f Zb", "%#.1f Yb" |
326 | }; |
327 | int shift = 0; |
815e510a |
328 | unsigned long long tmpsize; |
329 | double denominator; |
330 | |
331 | tmpsize = size; |
332 | denominator = 1.0; |
333 | while (tmpsize >= 1024 && shift < lenof(fmts)-1) { |
334 | tmpsize >>= 10; |
335 | denominator *= 1024.0; |
afe761f3 |
336 | shift++; |
337 | } |
815e510a |
338 | *display_size = size / denominator; |
afe761f3 |
339 | *fmt = fmts[shift]; |
340 | } |
341 | |
c47f39de |
342 | struct format_option { |
343 | const char *prefix, *suffix; /* may include '%%' */ |
344 | int prefixlen, suffixlen; /* does not count '%%' */ |
345 | char fmttype; /* 0 for none, or 'n' or 'p' */ |
346 | int translate_pathsep; /* pathsep rendered as '/'? */ |
347 | int shorten_path; /* omit common prefix? */ |
348 | }; |
349 | |
350 | /* |
351 | * Gets the next format option from a format string. Advances '*fmt' |
352 | * past it, or sets it to NULL if nothing is left. |
353 | */ |
354 | struct format_option get_format_option(const char **fmt) |
355 | { |
356 | struct format_option ret; |
357 | |
358 | /* |
359 | * Scan for prefix of format. |
360 | */ |
361 | ret.prefix = *fmt; |
362 | ret.prefixlen = 0; |
363 | while (1) { |
364 | if (**fmt == '\0') { |
365 | /* |
366 | * No formatting directive, and this is the last option. |
367 | */ |
368 | ret.suffix = *fmt; |
369 | ret.suffixlen = 0; |
370 | ret.fmttype = '\0'; |
371 | *fmt = NULL; |
372 | return ret; |
373 | } else if (**fmt == '%') { |
374 | if ((*fmt)[1] == '%') { |
375 | (*fmt) += 2; /* just advance one extra */ |
376 | ret.prefixlen++; |
377 | } else if ((*fmt)[1] == '|') { |
378 | /* |
379 | * No formatting directive. |
380 | */ |
381 | ret.suffix = *fmt; |
382 | ret.suffixlen = 0; |
383 | ret.fmttype = '\0'; |
384 | (*fmt) += 2; /* advance to start of next option */ |
385 | return ret; |
386 | } else { |
387 | break; |
388 | } |
389 | } else { |
390 | (*fmt)++; /* normal character */ |
391 | ret.prefixlen++; |
392 | } |
393 | } |
394 | |
395 | /* |
396 | * Interpret formatting directive with flags. |
397 | */ |
398 | (*fmt)++; |
399 | ret.translate_pathsep = ret.shorten_path = 1; |
400 | while (1) { |
401 | char c = *(*fmt)++; |
402 | assert(c); |
403 | if (c == '/') { |
404 | ret.translate_pathsep = 0; |
405 | } else if (c == '-') { |
406 | ret.shorten_path = 0; |
407 | } else { |
408 | assert(c == 'n' || c == 'p'); |
409 | ret.fmttype = c; |
410 | break; |
411 | } |
412 | } |
413 | |
414 | /* |
415 | * Scan for suffix. |
416 | */ |
417 | ret.suffix = *fmt; |
418 | ret.suffixlen = 0; |
419 | while (1) { |
420 | if (**fmt == '\0') { |
421 | /* |
422 | * This is the last option. |
423 | */ |
424 | *fmt = NULL; |
425 | return ret; |
426 | } else if (**fmt != '%') { |
427 | (*fmt)++; /* normal character */ |
428 | ret.suffixlen++; |
429 | } else { |
430 | if ((*fmt)[1] == '%') { |
431 | (*fmt) += 2; /* just advance one extra */ |
432 | ret.suffixlen++; |
433 | } else { |
434 | assert((*fmt)[1] == '|'); |
435 | (*fmt) += 2; /* advance to start of next option */ |
436 | return ret; |
437 | } |
438 | } |
439 | } |
440 | } |
441 | |
c113ed52 |
442 | char *format_string_inner(const char *fmt, int nescape, |
443 | unsigned long index, const void *t) |
00c5e40c |
444 | { |
c47f39de |
445 | int maxlen; |
446 | char *ret = NULL, *p = NULL; |
447 | char *path = NULL, *q = NULL; |
448 | char pathsep = trie_pathsep(t); |
449 | int maxpathlen = trie_maxpathlen(t); |
c113ed52 |
450 | int charindex; |
c47f39de |
451 | |
452 | while (fmt) { |
453 | struct format_option opt = get_format_option(&fmt); |
454 | if (index && !opt.fmttype) |
455 | continue; /* option is only good for the root, which this isn't */ |
456 | |
457 | maxlen = opt.prefixlen + opt.suffixlen + 1; |
458 | switch (opt.fmttype) { |
459 | case 'n': |
460 | maxlen += 40; /* generous length for an integer */ |
461 | break; |
462 | case 'p': |
463 | maxlen += 3*maxpathlen; /* might have to escape everything */ |
464 | break; |
465 | } |
466 | ret = snewn(maxlen, char); |
467 | p = ret; |
468 | while (opt.prefixlen-- > 0) { |
469 | if ((*p++ = *opt.prefix++) == '%') |
470 | opt.prefix++; |
471 | } |
472 | switch (opt.fmttype) { |
473 | case 'n': |
474 | p += sprintf(p, "%lu", index); |
475 | break; |
476 | case 'p': |
477 | path = snewn(1+trie_maxpathlen(t), char); |
478 | if (opt.shorten_path) { |
479 | trie_getpath(t, 0, path); |
480 | q = path + strlen(path); |
481 | trie_getpath(t, index, path); |
482 | if (*q == pathsep) |
483 | q++; |
484 | } else { |
485 | trie_getpath(t, index, path); |
486 | q = path; |
487 | } |
c113ed52 |
488 | charindex = 0; |
c47f39de |
489 | while (*q) { |
490 | char c = *q++; |
1d3a7ff6 |
491 | if (c == pathsep && opt.translate_pathsep) { |
c47f39de |
492 | *p++ = '/'; |
c113ed52 |
493 | charindex = 0; |
494 | } else if (charindex < nescape || |
495 | (!isalnum((unsigned char)c) && |
496 | ((charindex == 0 && c=='.') || |
497 | !strchr("-.@_", c)))) { |
c47f39de |
498 | p += sprintf(p, "=%02X", (unsigned char)c); |
c113ed52 |
499 | charindex++; |
1d3a7ff6 |
500 | } else { |
c47f39de |
501 | *p++ = c; |
c113ed52 |
502 | charindex++; |
1d3a7ff6 |
503 | } |
c47f39de |
504 | } |
505 | sfree(path); |
506 | break; |
507 | } |
508 | while (opt.suffixlen-- > 0) { |
509 | if ((*p++ = *opt.suffix++) == '%') |
510 | opt.suffix++; |
511 | } |
512 | *p = '\0'; |
513 | assert(p - ret < maxlen); |
514 | return ret; |
515 | } |
516 | assert(!"Getting here implies an incomplete set of formats"); |
517 | } |
518 | |
c113ed52 |
519 | int parse_path(const void *t, const char *path, |
520 | const char *fmt, unsigned long *index) |
c47f39de |
521 | { |
522 | int len = strlen(path); |
523 | int midlen; |
524 | const char *p, *q; |
525 | char *r; |
526 | char pathsep = trie_pathsep(t); |
c47f39de |
527 | |
528 | while (fmt) { |
529 | struct format_option opt = get_format_option(&fmt); |
530 | |
531 | /* |
532 | * Check prefix and suffix. |
533 | */ |
534 | midlen = len - opt.prefixlen - opt.suffixlen; |
535 | if (midlen < 0) |
536 | continue; /* prefix and suffix don't even fit */ |
537 | |
538 | p = path; |
539 | while (opt.prefixlen > 0) { |
540 | char c = *opt.prefix++; |
541 | if (c == '%') |
542 | opt.prefix++; |
543 | if (*p != c) |
544 | break; |
545 | p++; |
546 | opt.prefixlen--; |
547 | } |
548 | if (opt.prefixlen > 0) |
549 | continue; /* prefix didn't match */ |
550 | |
551 | q = path + len - opt.suffixlen; |
552 | while (opt.suffixlen > 0) { |
553 | char c = *opt.suffix++; |
554 | if (c == '%') |
555 | opt.suffix++; |
556 | if (*q != c) |
557 | break; |
558 | q++; |
559 | opt.suffixlen--; |
560 | } |
561 | if (opt.suffixlen > 0) |
562 | continue; /* suffix didn't match */ |
563 | |
564 | /* |
565 | * Check the data in between. p points at it, and it's midlen |
566 | * characters long. |
567 | */ |
568 | if (opt.fmttype == '\0') { |
569 | if (midlen == 0) { |
570 | /* |
571 | * Successful match against a root format. |
572 | */ |
573 | *index = 0; |
574 | return 1; |
575 | } |
576 | } else if (opt.fmttype == 'n') { |
577 | *index = 0; |
578 | while (midlen > 0) { |
579 | if (*p >= '0' && *p <= '9') |
580 | *index = *index * 10 + (*p - '0'); |
581 | else |
582 | break; |
583 | midlen--; |
584 | p++; |
585 | } |
586 | if (midlen == 0) { |
587 | /* |
588 | * Successful match against a numeric format. |
589 | */ |
590 | return 1; |
591 | } |
592 | } else { |
593 | assert(opt.fmttype == 'p'); |
594 | |
595 | int maxoutlen = trie_maxpathlen(t) + 1; |
596 | int maxinlen = midlen + 1; |
597 | char triepath[maxinlen+maxoutlen]; |
598 | |
599 | if (opt.shorten_path) { |
600 | trie_getpath(t, 0, triepath); |
601 | r = triepath + strlen(triepath); |
602 | if (r > triepath && r[-1] != pathsep) |
603 | *r++ = pathsep; |
604 | } else { |
605 | r = triepath; |
606 | } |
607 | |
608 | while (midlen > 0) { |
609 | if (*p == '/' && opt.translate_pathsep) { |
610 | *r++ = pathsep; |
611 | p++; |
612 | midlen--; |
613 | } else if (*p == '=') { |
1d3a7ff6 |
614 | /* |
615 | * We intentionally do not check whether the |
616 | * escaped character _should_ have been escaped |
617 | * according to the rules in html_format_path. |
618 | * |
619 | * All clients of this parsing function, after a |
620 | * successful parse, call html_format_path to find |
621 | * the canonical URI for the same index and return |
622 | * an HTTP redirect if the provided URI was not |
623 | * exactly equal to that canonical form. This is |
624 | * critical when the correction involves adding or |
625 | * removing a trailing slash (because then |
626 | * relative hrefs on the generated page can be |
627 | * computed with respect to the canonical URI |
628 | * instead of having to remember what the actual |
629 | * URI was), but also has the useful effect that |
630 | * if a user attempts to type in (guess) a URI by |
631 | * hand they don't have to remember the escaping |
632 | * rules - as long as they type _something_ that |
633 | * this code can parse into a recognisable |
634 | * pathname, it will be automatically 301ed into |
635 | * the canonical form. |
636 | */ |
c47f39de |
637 | if (midlen < 3 || |
638 | !isxdigit((unsigned char)p[1]) || |
639 | !isxdigit((unsigned char)p[2])) |
640 | break; /* faulty escape encoding */ |
641 | char x[3]; |
642 | unsigned cval; |
643 | x[0] = p[1]; |
644 | x[1] = p[2]; |
645 | x[2] = '\0'; |
646 | sscanf(x, "%x", &cval); |
647 | *r++ = cval; |
648 | p += 3; |
649 | midlen -= 3; |
650 | } else { |
651 | *r++ = *p; |
652 | p++; |
653 | midlen--; |
654 | } |
655 | } |
656 | if (midlen > 0) |
657 | continue; /* something went wrong in that loop */ |
658 | assert(r - triepath < maxinlen+maxoutlen); |
659 | *r = '\0'; |
660 | |
661 | unsigned long gotidx = trie_before(t, triepath); |
662 | if (gotidx >= trie_count(t)) |
663 | continue; /* index out of range */ |
664 | char retpath[1+maxoutlen]; |
665 | trie_getpath(t, gotidx, retpath); |
666 | if (strcmp(triepath, retpath)) |
667 | continue; /* exact path not found in trie */ |
668 | if (!index_has_root(t, gotidx)) |
669 | continue; /* path is not a directory */ |
670 | |
671 | /* |
672 | * Successful path-based match. |
673 | */ |
674 | *index = gotidx; |
675 | return 1; |
676 | } |
677 | } |
678 | |
679 | return 0; /* no match from any format option */ |
680 | } |
681 | |
c113ed52 |
682 | char *format_string(const char *fmt, unsigned long index, const void *t) |
683 | { |
684 | unsigned long indexout, parseret; |
685 | char *ret; |
686 | const char *stepfmt = fmt; |
687 | int nescape = 0; |
688 | |
689 | /* |
690 | * Format the string using whichever format option first works. |
691 | */ |
692 | ret = format_string_inner(fmt, 0, index, t); |
693 | |
694 | /* |
695 | * Now re-_parse_ the string, to see if it gives the same index |
696 | * back. It might not, if a pathname is valid in two formats: for |
697 | * instance, if you use '-H -d max' to generate a static HTML dump |
698 | * from scanning a directory which has a subdir called 'index', |
699 | * you might well find that the top-level file wants to be called |
700 | * index.html and so does the one for that subdir. |
701 | * |
702 | * We fix this by formatting the string again with more and more |
703 | * characters escaped, so that the non-root 'index.html' becomes |
704 | * (e.g.) '=69ndex.html', or '=69=6edex.html' if that doesn't |
705 | * work, etc. |
706 | */ |
707 | while (1) { |
708 | struct format_option opt = get_format_option(&stepfmt); |
709 | |
710 | /* |
711 | * Parse the pathname and see if it gives the right index. |
712 | */ |
713 | int parseret = parse_path(t, ret, fmt, &indexout); |
714 | assert(parseret != 0); |
715 | if (indexout == index) |
716 | break; /* path now parses successfully */ |
717 | |
718 | /* |
719 | * If not, try formatting it again. |
720 | */ |
721 | char *new = format_string_inner(fmt, ++nescape, index, t); |
722 | assert(strcmp(new, ret)); /* if nescape gets too big, give up */ |
723 | sfree(ret); |
724 | ret = new; |
725 | } |
726 | |
727 | return ret; |
728 | } |
729 | |
730 | char *html_format_path(const void *t, const struct html_config *cfg, |
731 | unsigned long index) |
732 | { |
733 | return format_string(cfg->uriformat, index, t); |
734 | } |
735 | |
736 | int html_parse_path(const void *t, const char *path, |
737 | const struct html_config *cfg, unsigned long *index) |
738 | { |
739 | return parse_path(t, path, cfg->uriformat, index); |
740 | } |
741 | |
c47f39de |
742 | char *make_href(const char *source, const char *target) |
743 | { |
744 | /* |
745 | * We insist that both source and target URIs start with a /, or |
746 | * else we won't be reliably able to construct relative hrefs |
747 | * between them (e.g. because we've got a suffix on the end of |
748 | * some CGI pathname that this function doesn't know the final |
749 | * component of). |
750 | */ |
751 | assert(*source == '/'); |
752 | assert(*target == '/'); |
753 | |
754 | /* |
755 | * Find the last / in source. Everything up to but not including |
756 | * that is the directory to which the output href will be |
757 | * relative. We enforce by assertion that there must be a / |
758 | * somewhere in source, or else we can't construct a relative href |
759 | * at all |
760 | */ |
761 | const char *sourceend = strrchr(source, '/'); |
762 | assert(sourceend != NULL); |
763 | |
764 | /* |
765 | * See how far the target URI agrees with the source one, up to |
766 | * and including that /. |
767 | */ |
768 | const char *s = source, *t = target; |
769 | while (s <= sourceend && *s == *t) |
770 | s++, t++; |
771 | |
772 | /* |
773 | * We're only interested in agreement of complete path components, |
774 | * so back off until we're sitting just after a shared /. |
775 | */ |
776 | while (s > source && s[-1] != '/') |
777 | s--, t--; |
778 | assert(s > source); |
779 | |
780 | /* |
781 | * Now we need some number of levels of "../" to get from source |
782 | * to here, and then we just replicate the rest of 'target'. |
783 | */ |
784 | int levels = 0; |
785 | while (s <= sourceend) { |
786 | if (*s == '/') |
787 | levels++; |
788 | s++; |
789 | } |
790 | int len = 3*levels + strlen(t); |
791 | if (len == 0) { |
792 | /* One last special case: if target has no tail _and_ we |
793 | * haven't written out any "../". */ |
794 | return dupstr("./"); |
795 | } else { |
796 | char *ret = snewn(len+1, char); |
797 | char *p = ret; |
798 | while (levels-- > 0) { |
799 | *p++ = '.'; |
800 | *p++ = '.'; |
801 | *p++ = '/'; |
802 | } |
803 | strcpy(p, t); |
804 | return ret; |
805 | } |
00c5e40c |
806 | } |
807 | |
70322ae3 |
808 | #define PIXEL_SIZE 600 /* FIXME: configurability? */ |
809 | static void write_report_line(struct html *ctx, struct vector *vec) |
810 | { |
742c1a74 |
811 | unsigned long long size, asize, divisor; |
afe761f3 |
812 | double display_size; |
70322ae3 |
813 | int pix, newpix; |
814 | int i; |
afe761f3 |
815 | const char *unitsfmt; |
70322ae3 |
816 | |
817 | /* |
010dd2a2 |
818 | * A line with literally zero space usage should not be |
819 | * printed at all if it's a link to a subdirectory (since it |
820 | * probably means the whole thing was excluded by some |
821 | * --exclude-path wildcard). If it's [files] or the top-level |
822 | * line, though, we must always print _something_, and in that |
823 | * case we must fiddle about to prevent divisions by zero in |
824 | * the code below. |
742c1a74 |
825 | */ |
16139d21 |
826 | if (!vec->sizes[MAXCOLOUR] && !vec->essential) |
010dd2a2 |
827 | return; |
742c1a74 |
828 | divisor = ctx->totalsize; |
010dd2a2 |
829 | if (!divisor) { |
742c1a74 |
830 | divisor = 1; |
010dd2a2 |
831 | } |
742c1a74 |
832 | |
833 | /* |
70322ae3 |
834 | * Find the total size of this subdirectory. |
835 | */ |
836 | size = vec->sizes[MAXCOLOUR]; |
afe761f3 |
837 | compute_display_size(size, &unitsfmt, &display_size); |
70322ae3 |
838 | htprintf(ctx, "<tr>\n" |
afe761f3 |
839 | "<td style=\"padding: 0.2em; text-align: right\">"); |
840 | htprintf(ctx, unitsfmt, display_size); |
841 | htprintf(ctx, "</td>\n"); |
70322ae3 |
842 | |
843 | /* |
844 | * Generate a colour bar. |
845 | */ |
846 | htprintf(ctx, "<td style=\"padding: 0.2em\">\n"); |
847 | begin_colour_bar(ctx); |
848 | pix = 0; |
849 | for (i = 0; i <= MAXCOLOUR; i++) { |
850 | asize = vec->sizes[i]; |
742c1a74 |
851 | newpix = asize * PIXEL_SIZE / divisor; |
70322ae3 |
852 | add_to_colour_bar(ctx, i, newpix - pix); |
853 | pix = newpix; |
854 | } |
855 | add_to_colour_bar(ctx, -1, PIXEL_SIZE - pix); |
856 | end_colour_bar(ctx); |
857 | htprintf(ctx, "</td>\n"); |
858 | |
859 | /* |
860 | * Output size as a percentage of totalsize. |
861 | */ |
862 | htprintf(ctx, "<td style=\"padding: 0.2em; text-align: right\">" |
742c1a74 |
863 | "%.2f%%</td>\n", (double)size / divisor * 100.0); |
70322ae3 |
864 | |
865 | /* |
866 | * Output a subdirectory marker. |
867 | */ |
868 | htprintf(ctx, "<td style=\"padding: 0.2em\">"); |
869 | if (vec->name) { |
870 | int doing_href = 0; |
871 | |
c47f39de |
872 | if (ctx->uriformat && vec->want_href) { |
873 | char *targeturi = format_string(ctx->uriformat, vec->index, |
874 | ctx->t); |
875 | char *link = make_href(ctx->oururi, targeturi); |
876 | htprintf(ctx, "<a href=\"%s\">", link); |
877 | sfree(link); |
878 | sfree(targeturi); |
70322ae3 |
879 | doing_href = 1; |
880 | } |
b49db535 |
881 | if (vec->literal) |
882 | htprintf(ctx, "<code>"); |
70322ae3 |
883 | htescape(ctx, vec->name, strlen(vec->name), 1); |
b49db535 |
884 | if (vec->literal) |
885 | htprintf(ctx, "</code>"); |
70322ae3 |
886 | if (doing_href) |
887 | htprintf(ctx, "</a>"); |
888 | } |
889 | htprintf(ctx, "</td>\n</tr>\n"); |
890 | } |
891 | |
0089cdbb |
892 | int strcmptrailingpathsep(const char *a, const char *b) |
893 | { |
894 | while (*a == *b && *a) |
895 | a++, b++; |
896 | |
897 | if ((*a == pathsep && !a[1] && !*b) || |
898 | (*b == pathsep && !b[1] && !*a)) |
899 | return 0; |
900 | |
901 | return (int)(unsigned char)*a - (int)(unsigned char)*b; |
902 | } |
903 | |
f2e52893 |
904 | char *html_query(const void *t, unsigned long index, |
00c5e40c |
905 | const struct html_config *cfg, int downlink) |
70322ae3 |
906 | { |
907 | struct html actx, *ctx = &actx; |
c47f39de |
908 | char *path, *path2, *p, *q; |
70322ae3 |
909 | char agebuf1[80], agebuf2[80]; |
c47f39de |
910 | size_t pathlen, subdirpos; |
70322ae3 |
911 | unsigned long index2; |
912 | int i; |
913 | struct vector **vecs; |
914 | int nvecs, vecsize; |
915 | unsigned long xi1, xi2, xj1, xj2; |
916 | |
917 | if (index >= trie_count(t)) |
918 | return NULL; |
919 | |
920 | ctx->buf = NULL; |
921 | ctx->buflen = ctx->bufsize = 0; |
922 | ctx->t = t; |
c47f39de |
923 | ctx->uriformat = cfg->uriformat; |
70322ae3 |
924 | htprintf(ctx, "<html>\n"); |
925 | |
926 | path = snewn(1+trie_maxpathlen(t), char); |
927 | ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char); |
4fc86a06 |
928 | if (cfg->uriformat) |
929 | ctx->oururi = format_string(cfg->uriformat, index, t); |
930 | else |
931 | ctx->oururi = NULL; |
70322ae3 |
932 | |
933 | /* |
934 | * HEAD section. |
935 | */ |
936 | htprintf(ctx, "<head>\n"); |
937 | trie_getpath(t, index, path); |
494ef23b |
938 | htprintf(ctx, "<title>"); |
939 | htescape(ctx, cfg->html_title, strlen(cfg->html_title), 0); |
940 | htprintf(ctx, ": "); |
70322ae3 |
941 | htescape(ctx, path, strlen(path), 0); |
942 | htprintf(ctx, "</title>\n"); |
943 | htprintf(ctx, "</head>\n"); |
944 | |
945 | /* |
946 | * Begin BODY section. |
947 | */ |
948 | htprintf(ctx, "<body>\n"); |
949 | htprintf(ctx, "<h3 align=center>Disk space breakdown by" |
950 | " last-access time</h3>\n"); |
951 | |
952 | /* |
953 | * Show the pathname we're centred on, with hyperlinks to |
954 | * parent directories where available. |
955 | */ |
956 | htprintf(ctx, "<p align=center>\n<code>"); |
957 | q = path; |
cfe942fb |
958 | for (p = strchr(path, pathsep); p && p[1]; p = strchr(p, pathsep)) { |
70322ae3 |
959 | int doing_href = 0; |
256c29a2 |
960 | char c, *zp; |
961 | |
70322ae3 |
962 | /* |
963 | * See if this path prefix exists in the trie. If so, |
964 | * generate a hyperlink. |
965 | */ |
256c29a2 |
966 | zp = p; |
967 | if (p == path) /* special case for "/" at start */ |
968 | zp++; |
969 | |
970 | p++; |
971 | |
972 | c = *zp; |
973 | *zp = '\0'; |
70322ae3 |
974 | index2 = trie_before(t, path); |
975 | trie_getpath(t, index2, path2); |
c47f39de |
976 | if (!strcmptrailingpathsep(path, path2) && cfg->uriformat) { |
977 | char *targeturi = format_string(cfg->uriformat, index2, t); |
978 | char *link = make_href(ctx->oururi, targeturi); |
979 | htprintf(ctx, "<a href=\"%s\">", link); |
980 | sfree(link); |
981 | sfree(targeturi); |
70322ae3 |
982 | doing_href = 1; |
983 | } |
256c29a2 |
984 | *zp = c; |
985 | htescape(ctx, q, zp - q, 1); |
70322ae3 |
986 | if (doing_href) |
987 | htprintf(ctx, "</a>"); |
256c29a2 |
988 | htescape(ctx, zp, p - zp, 1); |
989 | q = p; |
70322ae3 |
990 | } |
991 | htescape(ctx, q, strlen(q), 1); |
992 | htprintf(ctx, "</code>\n"); |
993 | |
994 | /* |
995 | * Decide on the age limit of our colour coding, establish the |
996 | * colour thresholds, and write out a key. |
997 | */ |
70322ae3 |
998 | ctx->now = time(NULL); |
f2e52893 |
999 | if (cfg->autoage) { |
1000 | ctx->oldest = index_order_stat(t, 0.05); |
1001 | ctx->newest = index_order_stat(t, 1.0); |
1002 | ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, -1); |
1003 | ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, +1); |
1004 | } else { |
1005 | ctx->oldest = cfg->oldest; |
1006 | ctx->newest = cfg->newest; |
1007 | ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, 0); |
1008 | ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, 0); |
1009 | } |
3f940260 |
1010 | for (i = 0; i < MAXCOLOUR; i++) { |
70322ae3 |
1011 | ctx->thresholds[i] = |
3f940260 |
1012 | ctx->oldest + (ctx->newest - ctx->oldest) * i / (MAXCOLOUR-1); |
1013 | } |
1014 | for (i = 0; i <= MAXCOLOUR; i++) { |
1015 | char buf[80]; |
1016 | |
1017 | if (i == 0) { |
6a4287ff |
1018 | strcpy(buf, "> "); |
3f940260 |
1019 | round_and_format_age(ctx, ctx->thresholds[0], buf+5, 0); |
1020 | } else if (i == MAXCOLOUR) { |
6a4287ff |
1021 | strcpy(buf, "< "); |
3f940260 |
1022 | round_and_format_age(ctx, ctx->thresholds[MAXCOLOUR-1], buf+5, 0); |
1023 | } else { |
1024 | unsigned long long midrange = |
1025 | (ctx->thresholds[i-1] + ctx->thresholds[i]) / 2; |
1026 | round_and_format_age(ctx, midrange, buf, 0); |
1027 | } |
1028 | |
1029 | ctx->titletexts[i] = dupstr(buf); |
70322ae3 |
1030 | } |
1031 | htprintf(ctx, "<p align=center>Key to colour coding (mouse over for more detail):\n"); |
1032 | htprintf(ctx, "<p align=center style=\"padding: 0; margin-top:0.4em; " |
c828a5bc |
1033 | "margin-bottom:1em\">"); |
70322ae3 |
1034 | begin_colour_bar(ctx); |
1035 | htprintf(ctx, "<td style=\"padding-right:1em\">%s</td>\n", agebuf1); |
1036 | for (i = 0; i < MAXCOLOUR; i++) |
1037 | add_to_colour_bar(ctx, i, 1); |
1038 | htprintf(ctx, "<td style=\"padding-left:1em\">%s</td>\n", agebuf2); |
1039 | end_colour_bar(ctx); |
1040 | |
1041 | /* |
1042 | * Begin the main table. |
1043 | */ |
1044 | htprintf(ctx, "<p align=center>\n<table style=\"margin:0; border:0\">\n"); |
1045 | |
1046 | /* |
1047 | * Find the total size of our entire subdirectory. We'll use |
1048 | * that as the scale for all the colour bars in this report. |
1049 | */ |
3f940260 |
1050 | get_indices(t, path, &xi1, &xi2); |
1051 | ctx->totalsize = fetch_size(t, xi1, xi2, ULLONG_MAX); |
70322ae3 |
1052 | |
1053 | /* |
1054 | * Generate a report line for the whole subdirectory. |
1055 | */ |
1056 | vecsize = 64; |
1057 | vecs = snewn(vecsize, struct vector *); |
1058 | nvecs = 1; |
16139d21 |
1059 | vecs[0] = make_vector(ctx, path, 0, 1, NULL, 0); |
70322ae3 |
1060 | print_heading(ctx, "Overall"); |
1061 | write_report_line(ctx, vecs[0]); |
1062 | |
1063 | /* |
1064 | * Now generate report lines for all its children, and the |
1065 | * files contained in it. |
1066 | */ |
1067 | print_heading(ctx, "Subdirectories"); |
1068 | |
1069 | vecs[0]->name = dupstr("[files]"); |
1070 | get_indices(t, path, &xi1, &xi2); |
1071 | xi1++; |
1072 | pathlen = strlen(path); |
256c29a2 |
1073 | subdirpos = pathlen + 1; |
1074 | if (pathlen > 0 && path[pathlen-1] == pathsep) |
1075 | subdirpos--; |
70322ae3 |
1076 | while (xi1 < xi2) { |
1077 | trie_getpath(t, xi1, path2); |
1078 | get_indices(t, ctx->path2, &xj1, &xj2); |
1079 | xi1 = xj2; |
16139d21 |
1080 | if (!cfg->showfiles && xj2 - xj1 <= 1) |
70322ae3 |
1081 | continue; /* skip individual files */ |
1082 | if (nvecs >= vecsize) { |
1083 | vecsize = nvecs * 3 / 2 + 64; |
1084 | vecs = sresize(vecs, vecsize, struct vector *); |
1085 | } |
1086 | assert(strlen(path2) > pathlen); |
00c5e40c |
1087 | vecs[nvecs] = make_vector(ctx, path2, downlink && (xj2 - xj1 > 1), 0, |
16139d21 |
1088 | path2 + subdirpos, 1); |
70322ae3 |
1089 | for (i = 0; i <= MAXCOLOUR; i++) |
1090 | vecs[0]->sizes[i] -= vecs[nvecs]->sizes[i]; |
1091 | nvecs++; |
1092 | } |
1093 | |
1094 | qsort(vecs, nvecs, sizeof(vecs[0]), vec_compare); |
1095 | |
1096 | for (i = 0; i < nvecs; i++) |
1097 | write_report_line(ctx, vecs[i]); |
1098 | |
1099 | /* |
1100 | * Close the main table. |
1101 | */ |
1102 | htprintf(ctx, "</table>\n"); |
1103 | |
1104 | /* |
1105 | * Finish up and tidy up. |
1106 | */ |
1107 | htprintf(ctx, "</body>\n"); |
1108 | htprintf(ctx, "</html>\n"); |
c47f39de |
1109 | sfree(ctx->oururi); |
70322ae3 |
1110 | sfree(path2); |
1111 | sfree(path); |
1112 | for (i = 0; i < nvecs; i++) { |
1113 | sfree(vecs[i]->name); |
1114 | sfree(vecs[i]); |
1115 | } |
1116 | sfree(vecs); |
1117 | |
1118 | return ctx->buf; |
1119 | } |
00c5e40c |
1120 | |
1121 | int html_dump(const void *t, unsigned long index, unsigned long endindex, |
1122 | int maxdepth, const struct html_config *cfg, |
1123 | const char *pathprefix) |
1124 | { |
1125 | /* |
1126 | * Determine the filename for this file. |
1127 | */ |
c47f39de |
1128 | assert(cfg->fileformat != NULL); |
1129 | char *filename = format_string(cfg->fileformat, index, t); |
1130 | char *path = dupfmt("%s%s", pathprefix, filename); |
1131 | sfree(filename); |
00c5e40c |
1132 | |
1133 | /* |
1134 | * Create the HTML itself. Don't write out downlinks from our |
1135 | * deepest level. |
1136 | */ |
1137 | char *html = html_query(t, index, cfg, maxdepth != 0); |
1138 | |
1139 | /* |
1140 | * Write it out. |
1141 | */ |
c47f39de |
1142 | FILE *fp = fopen(path, "w"); |
00c5e40c |
1143 | if (!fp) { |
c47f39de |
1144 | fprintf(stderr, "%s: %s: open: %s\n", PNAME, path, strerror(errno)); |
00c5e40c |
1145 | return 1; |
1146 | } |
1147 | if (fputs(html, fp) < 0) { |
c47f39de |
1148 | fprintf(stderr, "%s: %s: write: %s\n", PNAME, path, strerror(errno)); |
00c5e40c |
1149 | fclose(fp); |
1150 | return 1; |
1151 | } |
1152 | if (fclose(fp) < 0) { |
c47f39de |
1153 | fprintf(stderr, "%s: %s: fclose: %s\n", PNAME, path, strerror(errno)); |
00c5e40c |
1154 | return 1; |
1155 | } |
c47f39de |
1156 | sfree(path); |
00c5e40c |
1157 | |
1158 | /* |
1159 | * Recurse. |
1160 | */ |
1161 | if (maxdepth != 0) { |
1162 | unsigned long subindex, subendindex; |
1163 | int newdepth = (maxdepth > 0 ? maxdepth - 1 : maxdepth); |
c47f39de |
1164 | char rpath[1+trie_maxpathlen(t)]; |
00c5e40c |
1165 | |
1166 | index++; |
1167 | while (index < endindex) { |
c47f39de |
1168 | trie_getpath(t, index, rpath); |
1169 | get_indices(t, rpath, &subindex, &subendindex); |
00c5e40c |
1170 | index = subendindex; |
1171 | if (subendindex - subindex > 1) { |
1172 | if (html_dump(t, subindex, subendindex, newdepth, |
1173 | cfg, pathprefix)) |
1174 | return 1; |
1175 | } |
1176 | } |
1177 | } |
1178 | return 0; |
1179 | } |