Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / html.c
CommitLineData
70322ae3 1/*
2 * html.c: implementation of html.h.
3 */
4
353bc75d 5#include "agedu.h"
70322ae3 6#include "html.h"
995db599 7#include "alloc.h"
70322ae3 8#include "trie.h"
9#include "index.h"
10
70322ae3 11#define MAXCOLOUR 511
12
13struct html {
14 char *buf;
15 size_t buflen, bufsize;
16 const void *t;
17 unsigned long long totalsize, oldest, newest;
18 char *path2;
c47f39de 19 char *oururi;
70322ae3 20 size_t hreflen;
c47f39de 21 const char *uriformat;
3f940260 22 unsigned long long thresholds[MAXCOLOUR];
23 char *titletexts[MAXCOLOUR+1];
70322ae3 24 time_t now;
25};
26
afe761f3 27static void vhtprintf(struct html *ctx, const char *fmt, va_list ap)
70322ae3 28{
29 va_list ap2;
30 int size, size2;
50e82fdc 31 char testbuf[2];
70322ae3 32
33 va_copy(ap2, ap);
50e82fdc 34 /*
35 * Some C libraries (Solaris, I'm looking at you) don't like
36 * an output buffer size of zero in vsnprintf, but will return
37 * sensible values given any non-zero buffer size. Hence, we
38 * use testbuf to gauge the length of the string.
39 */
40 size = vsnprintf(testbuf, 1, fmt, ap2);
70322ae3 41 va_end(ap2);
42
43 if (ctx->buflen + size >= ctx->bufsize) {
44 ctx->bufsize = (ctx->buflen + size) * 3 / 2 + 1024;
45 ctx->buf = sresize(ctx->buf, ctx->bufsize, char);
46 }
47 size2 = vsnprintf(ctx->buf + ctx->buflen, ctx->bufsize - ctx->buflen,
48 fmt, ap);
49 assert(size == size2);
50 ctx->buflen += size;
51}
52
afe761f3 53static void htprintf(struct html *ctx, const char *fmt, ...)
70322ae3 54{
55 va_list ap;
56 va_start(ap, fmt);
57 vhtprintf(ctx, fmt, ap);
58 va_end(ap);
59}
60
61static unsigned long long round_and_format_age(struct html *ctx,
62 unsigned long long age,
63 char *buf, int direction)
64{
65 struct tm tm, tm2;
66 char newbuf[80];
67 unsigned long long ret, newret;
68 int i;
69 int ym;
70 static const int minutes[] = { 5, 10, 15, 30, 45 };
71
72 tm = *localtime(&ctx->now);
73 ym = tm.tm_year * 12 + tm.tm_mon;
74
75 ret = ctx->now;
76 strcpy(buf, "Now");
77
78 for (i = 0; i < lenof(minutes); i++) {
79 newret = ctx->now - minutes[i] * 60;
80 sprintf(newbuf, "%d minutes", minutes[i]);
81 if (newret < age)
82 goto finish;
83 strcpy(buf, newbuf);
84 ret = newret;
85 }
86
87 for (i = 1; i < 24; i++) {
88 newret = ctx->now - i * (60*60);
89 sprintf(newbuf, "%d hour%s", i, i==1 ? "" : "s");
90 if (newret < age)
91 goto finish;
92 strcpy(buf, newbuf);
93 ret = newret;
94 }
95
96 for (i = 1; i < 7; i++) {
97 newret = ctx->now - i * (24*60*60);
98 sprintf(newbuf, "%d day%s", i, i==1 ? "" : "s");
99 if (newret < age)
100 goto finish;
101 strcpy(buf, newbuf);
102 ret = newret;
103 }
104
105 for (i = 1; i < 4; i++) {
106 newret = ctx->now - i * (7*24*60*60);
107 sprintf(newbuf, "%d week%s", i, i==1 ? "" : "s");
108 if (newret < age)
109 goto finish;
110 strcpy(buf, newbuf);
111 ret = newret;
112 }
113
114 for (i = 1; i < 11; i++) {
115 tm2 = tm; /* structure copy */
116 tm2.tm_year = (ym - i) / 12;
117 tm2.tm_mon = (ym - i) % 12;
118 newret = mktime(&tm2);
119 sprintf(newbuf, "%d month%s", i, i==1 ? "" : "s");
120 if (newret < age)
121 goto finish;
122 strcpy(buf, newbuf);
123 ret = newret;
124 }
125
126 for (i = 1;; i++) {
127 tm2 = tm; /* structure copy */
128 tm2.tm_year = (ym - i*12) / 12;
129 tm2.tm_mon = (ym - i*12) % 12;
130 newret = mktime(&tm2);
131 sprintf(newbuf, "%d year%s", i, i==1 ? "" : "s");
132 if (newret < age)
133 goto finish;
134 strcpy(buf, newbuf);
135 ret = newret;
136 }
137
138 finish:
139 if (direction > 0) {
140 /*
141 * Round toward newest, i.e. use the existing (buf,ret).
142 */
143 } else if (direction < 0) {
144 /*
145 * Round toward oldest, i.e. use (newbuf,newret);
146 */
147 strcpy(buf, newbuf);
148 ret = newret;
149 } else {
150 /*
151 * Round to nearest.
152 */
153 if (ret - age > age - newret) {
154 strcpy(buf, newbuf);
155 ret = newret;
156 }
157 }
158 return ret;
159}
160
161static void get_indices(const void *t, char *path,
162 unsigned long *xi1, unsigned long *xi2)
163{
164 size_t pathlen = strlen(path);
256c29a2 165 int c1 = path[pathlen], c2 = (pathlen > 0 ? path[pathlen-1] : 0);
70322ae3 166
167 *xi1 = trie_before(t, path);
256c29a2 168 make_successor(path);
70322ae3 169 *xi2 = trie_before(t, path);
256c29a2 170 path[pathlen] = c1;
171 if (pathlen > 0)
172 path[pathlen-1] = c2;
70322ae3 173}
174
3f940260 175static unsigned long long fetch_size(const void *t,
176 unsigned long xi1, unsigned long xi2,
70322ae3 177 unsigned long long atime)
178{
16139d21 179 if (xi2 - xi1 == 1) {
180 /*
181 * We are querying an individual file, so we should not
182 * depend on the index entries either side of the node,
183 * since they almost certainly don't both exist. Instead,
184 * just look up the file's size and atime in the main trie.
185 */
186 const struct trie_file *f = trie_getfile(t, xi1);
187 if (f->atime < atime)
188 return f->size;
189 else
190 return 0;
191 } else {
192 return index_query(t, xi2, atime) - index_query(t, xi1, atime);
193 }
70322ae3 194}
195
196static void htescape(struct html *ctx, const char *s, int n, int italics)
197{
198 while (n > 0 && *s) {
199 unsigned char c = (unsigned char)*s++;
200
201 if (c == '&')
202 htprintf(ctx, "&amp;");
203 else if (c == '<')
204 htprintf(ctx, "&lt;");
205 else if (c == '>')
206 htprintf(ctx, "&gt;");
207 else if (c >= ' ' && c < '\177')
208 htprintf(ctx, "%c", c);
209 else {
210 if (italics) htprintf(ctx, "<i>");
211 htprintf(ctx, "[%02x]", c);
212 if (italics) htprintf(ctx, "</i>");
213 }
214
215 n--;
216 }
217}
218
219static void begin_colour_bar(struct html *ctx)
220{
221 htprintf(ctx, "<table cellspacing=0 cellpadding=0"
222 " style=\"border:0\">\n<tr>\n");
223}
224
225static void add_to_colour_bar(struct html *ctx, int colour, int pixels)
226{
227 int r, g, b;
70322ae3 228
229 if (colour >= 0 && colour < 256) /* red -> yellow fade */
230 r = 255, g = colour, b = 0;
231 else if (colour >= 256 && colour <= 511) /* yellow -> green fade */
232 r = 511 - colour, g = 255, b = 0;
233 else /* background grey */
234 r = g = b = 240;
235
70322ae3 236 if (pixels > 0) {
237 htprintf(ctx, "<td style=\"width:%dpx; height:1em; "
238 "background-color:#%02x%02x%02x\"",
239 pixels, r, g, b);
240 if (colour >= 0)
3f940260 241 htprintf(ctx, " title=\"%s\"", ctx->titletexts[colour]);
70322ae3 242 htprintf(ctx, "></td>\n");
243 }
244}
245
246static void end_colour_bar(struct html *ctx)
247{
248 htprintf(ctx, "</tr>\n</table>\n");
249}
250
251struct vector {
16139d21 252 int want_href, essential;
70322ae3 253 char *name;
b49db535 254 int literal; /* should the name be formatted in fixed-pitch? */
70322ae3 255 unsigned long index;
256 unsigned long long sizes[MAXCOLOUR+1];
257};
258
259int vec_compare(const void *av, const void *bv)
260{
261 const struct vector *a = *(const struct vector **)av;
262 const struct vector *b = *(const struct vector **)bv;
263
264 if (a->sizes[MAXCOLOUR] > b->sizes[MAXCOLOUR])
265 return -1;
266 else if (a->sizes[MAXCOLOUR] < b->sizes[MAXCOLOUR])
267 return +1;
268 else if (a->want_href < b->want_href)
269 return +1;
270 else if (a->want_href > b->want_href)
271 return -1;
272 else if (a->want_href)
273 return strcmp(a->name, b->name);
274 else if (a->index < b->index)
275 return -1;
276 else if (a->index > b->index)
277 return +1;
16139d21 278 else if (a->essential < b->essential)
279 return +1;
280 else if (a->essential > b->essential)
281 return -1;
70322ae3 282 return 0;
283}
284
285static struct vector *make_vector(struct html *ctx, char *path,
16139d21 286 int want_href, int essential,
287 char *name, int literal)
70322ae3 288{
289 unsigned long xi1, xi2;
290 struct vector *vec = snew(struct vector);
291 int i;
292
293 vec->want_href = want_href;
16139d21 294 vec->essential = essential;
70322ae3 295 vec->name = name ? dupstr(name) : NULL;
b49db535 296 vec->literal = literal;
70322ae3 297
298 get_indices(ctx->t, path, &xi1, &xi2);
299
300 vec->index = xi1;
301
302 for (i = 0; i <= MAXCOLOUR; i++) {
303 unsigned long long atime;
304 if (i == MAXCOLOUR)
305 atime = ULLONG_MAX;
306 else
307 atime = ctx->thresholds[i];
3f940260 308 vec->sizes[i] = fetch_size(ctx->t, xi1, xi2, atime);
70322ae3 309 }
310
311 return vec;
312}
313
314static void print_heading(struct html *ctx, const char *title)
315{
316 htprintf(ctx, "<tr style=\"padding: 0.2em; background-color:#e0e0e0\">\n"
317 "<td colspan=4 align=center>%s</td>\n</tr>\n", title);
318}
319
afe761f3 320static void compute_display_size(unsigned long long size,
321 const char **fmt, double *display_size)
322{
323 static const char *const fmts[] = {
324 "%g b", "%g Kb", "%#.1f Mb", "%#.1f Gb", "%#.1f Tb",
325 "%#.1f Pb", "%#.1f Eb", "%#.1f Zb", "%#.1f Yb"
326 };
327 int shift = 0;
815e510a 328 unsigned long long tmpsize;
329 double denominator;
330
331 tmpsize = size;
332 denominator = 1.0;
333 while (tmpsize >= 1024 && shift < lenof(fmts)-1) {
334 tmpsize >>= 10;
335 denominator *= 1024.0;
afe761f3 336 shift++;
337 }
815e510a 338 *display_size = size / denominator;
afe761f3 339 *fmt = fmts[shift];
340}
341
c47f39de 342struct format_option {
343 const char *prefix, *suffix; /* may include '%%' */
344 int prefixlen, suffixlen; /* does not count '%%' */
345 char fmttype; /* 0 for none, or 'n' or 'p' */
346 int translate_pathsep; /* pathsep rendered as '/'? */
347 int shorten_path; /* omit common prefix? */
348};
349
350/*
351 * Gets the next format option from a format string. Advances '*fmt'
352 * past it, or sets it to NULL if nothing is left.
353 */
354struct format_option get_format_option(const char **fmt)
355{
356 struct format_option ret;
357
358 /*
359 * Scan for prefix of format.
360 */
361 ret.prefix = *fmt;
362 ret.prefixlen = 0;
363 while (1) {
364 if (**fmt == '\0') {
365 /*
366 * No formatting directive, and this is the last option.
367 */
368 ret.suffix = *fmt;
369 ret.suffixlen = 0;
370 ret.fmttype = '\0';
371 *fmt = NULL;
372 return ret;
373 } else if (**fmt == '%') {
374 if ((*fmt)[1] == '%') {
375 (*fmt) += 2; /* just advance one extra */
376 ret.prefixlen++;
377 } else if ((*fmt)[1] == '|') {
378 /*
379 * No formatting directive.
380 */
381 ret.suffix = *fmt;
382 ret.suffixlen = 0;
383 ret.fmttype = '\0';
384 (*fmt) += 2; /* advance to start of next option */
385 return ret;
386 } else {
387 break;
388 }
389 } else {
390 (*fmt)++; /* normal character */
391 ret.prefixlen++;
392 }
393 }
394
395 /*
396 * Interpret formatting directive with flags.
397 */
398 (*fmt)++;
399 ret.translate_pathsep = ret.shorten_path = 1;
400 while (1) {
401 char c = *(*fmt)++;
402 assert(c);
403 if (c == '/') {
404 ret.translate_pathsep = 0;
405 } else if (c == '-') {
406 ret.shorten_path = 0;
407 } else {
408 assert(c == 'n' || c == 'p');
409 ret.fmttype = c;
410 break;
411 }
412 }
413
414 /*
415 * Scan for suffix.
416 */
417 ret.suffix = *fmt;
418 ret.suffixlen = 0;
419 while (1) {
420 if (**fmt == '\0') {
421 /*
422 * This is the last option.
423 */
424 *fmt = NULL;
425 return ret;
426 } else if (**fmt != '%') {
427 (*fmt)++; /* normal character */
428 ret.suffixlen++;
429 } else {
430 if ((*fmt)[1] == '%') {
431 (*fmt) += 2; /* just advance one extra */
432 ret.suffixlen++;
433 } else {
434 assert((*fmt)[1] == '|');
435 (*fmt) += 2; /* advance to start of next option */
436 return ret;
437 }
438 }
439 }
440}
441
c113ed52 442char *format_string_inner(const char *fmt, int nescape,
443 unsigned long index, const void *t)
00c5e40c 444{
c47f39de 445 int maxlen;
446 char *ret = NULL, *p = NULL;
447 char *path = NULL, *q = NULL;
448 char pathsep = trie_pathsep(t);
449 int maxpathlen = trie_maxpathlen(t);
c113ed52 450 int charindex;
c47f39de 451
452 while (fmt) {
453 struct format_option opt = get_format_option(&fmt);
454 if (index && !opt.fmttype)
455 continue; /* option is only good for the root, which this isn't */
456
457 maxlen = opt.prefixlen + opt.suffixlen + 1;
458 switch (opt.fmttype) {
459 case 'n':
460 maxlen += 40; /* generous length for an integer */
461 break;
462 case 'p':
463 maxlen += 3*maxpathlen; /* might have to escape everything */
464 break;
465 }
466 ret = snewn(maxlen, char);
467 p = ret;
468 while (opt.prefixlen-- > 0) {
469 if ((*p++ = *opt.prefix++) == '%')
470 opt.prefix++;
471 }
472 switch (opt.fmttype) {
473 case 'n':
474 p += sprintf(p, "%lu", index);
475 break;
476 case 'p':
477 path = snewn(1+trie_maxpathlen(t), char);
478 if (opt.shorten_path) {
479 trie_getpath(t, 0, path);
480 q = path + strlen(path);
481 trie_getpath(t, index, path);
482 if (*q == pathsep)
483 q++;
484 } else {
485 trie_getpath(t, index, path);
486 q = path;
487 }
c113ed52 488 charindex = 0;
c47f39de 489 while (*q) {
490 char c = *q++;
1d3a7ff6 491 if (c == pathsep && opt.translate_pathsep) {
c47f39de 492 *p++ = '/';
c113ed52 493 charindex = 0;
494 } else if (charindex < nescape ||
495 (!isalnum((unsigned char)c) &&
496 ((charindex == 0 && c=='.') ||
497 !strchr("-.@_", c)))) {
c47f39de 498 p += sprintf(p, "=%02X", (unsigned char)c);
c113ed52 499 charindex++;
1d3a7ff6 500 } else {
c47f39de 501 *p++ = c;
c113ed52 502 charindex++;
1d3a7ff6 503 }
c47f39de 504 }
505 sfree(path);
506 break;
507 }
508 while (opt.suffixlen-- > 0) {
509 if ((*p++ = *opt.suffix++) == '%')
510 opt.suffix++;
511 }
512 *p = '\0';
513 assert(p - ret < maxlen);
514 return ret;
515 }
516 assert(!"Getting here implies an incomplete set of formats");
517}
518
c113ed52 519int parse_path(const void *t, const char *path,
520 const char *fmt, unsigned long *index)
c47f39de 521{
522 int len = strlen(path);
523 int midlen;
524 const char *p, *q;
525 char *r;
526 char pathsep = trie_pathsep(t);
c47f39de 527
528 while (fmt) {
529 struct format_option opt = get_format_option(&fmt);
530
531 /*
532 * Check prefix and suffix.
533 */
534 midlen = len - opt.prefixlen - opt.suffixlen;
535 if (midlen < 0)
536 continue; /* prefix and suffix don't even fit */
537
538 p = path;
539 while (opt.prefixlen > 0) {
540 char c = *opt.prefix++;
541 if (c == '%')
542 opt.prefix++;
543 if (*p != c)
544 break;
545 p++;
546 opt.prefixlen--;
547 }
548 if (opt.prefixlen > 0)
549 continue; /* prefix didn't match */
550
551 q = path + len - opt.suffixlen;
552 while (opt.suffixlen > 0) {
553 char c = *opt.suffix++;
554 if (c == '%')
555 opt.suffix++;
556 if (*q != c)
557 break;
558 q++;
559 opt.suffixlen--;
560 }
561 if (opt.suffixlen > 0)
562 continue; /* suffix didn't match */
563
564 /*
565 * Check the data in between. p points at it, and it's midlen
566 * characters long.
567 */
568 if (opt.fmttype == '\0') {
569 if (midlen == 0) {
570 /*
571 * Successful match against a root format.
572 */
573 *index = 0;
574 return 1;
575 }
576 } else if (opt.fmttype == 'n') {
577 *index = 0;
578 while (midlen > 0) {
579 if (*p >= '0' && *p <= '9')
580 *index = *index * 10 + (*p - '0');
581 else
582 break;
583 midlen--;
584 p++;
585 }
586 if (midlen == 0) {
587 /*
588 * Successful match against a numeric format.
589 */
590 return 1;
591 }
592 } else {
593 assert(opt.fmttype == 'p');
594
595 int maxoutlen = trie_maxpathlen(t) + 1;
596 int maxinlen = midlen + 1;
597 char triepath[maxinlen+maxoutlen];
598
599 if (opt.shorten_path) {
600 trie_getpath(t, 0, triepath);
601 r = triepath + strlen(triepath);
602 if (r > triepath && r[-1] != pathsep)
603 *r++ = pathsep;
604 } else {
605 r = triepath;
606 }
607
608 while (midlen > 0) {
609 if (*p == '/' && opt.translate_pathsep) {
610 *r++ = pathsep;
611 p++;
612 midlen--;
613 } else if (*p == '=') {
1d3a7ff6 614 /*
615 * We intentionally do not check whether the
616 * escaped character _should_ have been escaped
617 * according to the rules in html_format_path.
618 *
619 * All clients of this parsing function, after a
620 * successful parse, call html_format_path to find
621 * the canonical URI for the same index and return
622 * an HTTP redirect if the provided URI was not
623 * exactly equal to that canonical form. This is
624 * critical when the correction involves adding or
625 * removing a trailing slash (because then
626 * relative hrefs on the generated page can be
627 * computed with respect to the canonical URI
628 * instead of having to remember what the actual
629 * URI was), but also has the useful effect that
630 * if a user attempts to type in (guess) a URI by
631 * hand they don't have to remember the escaping
632 * rules - as long as they type _something_ that
633 * this code can parse into a recognisable
634 * pathname, it will be automatically 301ed into
635 * the canonical form.
636 */
c47f39de 637 if (midlen < 3 ||
638 !isxdigit((unsigned char)p[1]) ||
639 !isxdigit((unsigned char)p[2]))
640 break; /* faulty escape encoding */
641 char x[3];
642 unsigned cval;
643 x[0] = p[1];
644 x[1] = p[2];
645 x[2] = '\0';
646 sscanf(x, "%x", &cval);
647 *r++ = cval;
648 p += 3;
649 midlen -= 3;
650 } else {
651 *r++ = *p;
652 p++;
653 midlen--;
654 }
655 }
656 if (midlen > 0)
657 continue; /* something went wrong in that loop */
658 assert(r - triepath < maxinlen+maxoutlen);
659 *r = '\0';
660
661 unsigned long gotidx = trie_before(t, triepath);
662 if (gotidx >= trie_count(t))
663 continue; /* index out of range */
664 char retpath[1+maxoutlen];
665 trie_getpath(t, gotidx, retpath);
666 if (strcmp(triepath, retpath))
667 continue; /* exact path not found in trie */
668 if (!index_has_root(t, gotidx))
669 continue; /* path is not a directory */
670
671 /*
672 * Successful path-based match.
673 */
674 *index = gotidx;
675 return 1;
676 }
677 }
678
679 return 0; /* no match from any format option */
680}
681
c113ed52 682char *format_string(const char *fmt, unsigned long index, const void *t)
683{
684 unsigned long indexout, parseret;
685 char *ret;
686 const char *stepfmt = fmt;
687 int nescape = 0;
688
689 /*
690 * Format the string using whichever format option first works.
691 */
692 ret = format_string_inner(fmt, 0, index, t);
693
694 /*
695 * Now re-_parse_ the string, to see if it gives the same index
696 * back. It might not, if a pathname is valid in two formats: for
697 * instance, if you use '-H -d max' to generate a static HTML dump
698 * from scanning a directory which has a subdir called 'index',
699 * you might well find that the top-level file wants to be called
700 * index.html and so does the one for that subdir.
701 *
702 * We fix this by formatting the string again with more and more
703 * characters escaped, so that the non-root 'index.html' becomes
704 * (e.g.) '=69ndex.html', or '=69=6edex.html' if that doesn't
705 * work, etc.
706 */
707 while (1) {
708 struct format_option opt = get_format_option(&stepfmt);
709
710 /*
711 * Parse the pathname and see if it gives the right index.
712 */
713 int parseret = parse_path(t, ret, fmt, &indexout);
714 assert(parseret != 0);
715 if (indexout == index)
716 break; /* path now parses successfully */
717
718 /*
719 * If not, try formatting it again.
720 */
721 char *new = format_string_inner(fmt, ++nescape, index, t);
722 assert(strcmp(new, ret)); /* if nescape gets too big, give up */
723 sfree(ret);
724 ret = new;
725 }
726
727 return ret;
728}
729
730char *html_format_path(const void *t, const struct html_config *cfg,
731 unsigned long index)
732{
733 return format_string(cfg->uriformat, index, t);
734}
735
736int html_parse_path(const void *t, const char *path,
737 const struct html_config *cfg, unsigned long *index)
738{
739 return parse_path(t, path, cfg->uriformat, index);
740}
741
c47f39de 742char *make_href(const char *source, const char *target)
743{
744 /*
745 * We insist that both source and target URIs start with a /, or
746 * else we won't be reliably able to construct relative hrefs
747 * between them (e.g. because we've got a suffix on the end of
748 * some CGI pathname that this function doesn't know the final
749 * component of).
750 */
751 assert(*source == '/');
752 assert(*target == '/');
753
754 /*
755 * Find the last / in source. Everything up to but not including
756 * that is the directory to which the output href will be
757 * relative. We enforce by assertion that there must be a /
758 * somewhere in source, or else we can't construct a relative href
759 * at all
760 */
761 const char *sourceend = strrchr(source, '/');
762 assert(sourceend != NULL);
763
764 /*
765 * See how far the target URI agrees with the source one, up to
766 * and including that /.
767 */
768 const char *s = source, *t = target;
769 while (s <= sourceend && *s == *t)
770 s++, t++;
771
772 /*
773 * We're only interested in agreement of complete path components,
774 * so back off until we're sitting just after a shared /.
775 */
776 while (s > source && s[-1] != '/')
777 s--, t--;
778 assert(s > source);
779
780 /*
781 * Now we need some number of levels of "../" to get from source
782 * to here, and then we just replicate the rest of 'target'.
783 */
784 int levels = 0;
785 while (s <= sourceend) {
786 if (*s == '/')
787 levels++;
788 s++;
789 }
790 int len = 3*levels + strlen(t);
791 if (len == 0) {
792 /* One last special case: if target has no tail _and_ we
793 * haven't written out any "../". */
794 return dupstr("./");
795 } else {
796 char *ret = snewn(len+1, char);
797 char *p = ret;
798 while (levels-- > 0) {
799 *p++ = '.';
800 *p++ = '.';
801 *p++ = '/';
802 }
803 strcpy(p, t);
804 return ret;
805 }
00c5e40c 806}
807
70322ae3 808#define PIXEL_SIZE 600 /* FIXME: configurability? */
809static void write_report_line(struct html *ctx, struct vector *vec)
810{
742c1a74 811 unsigned long long size, asize, divisor;
afe761f3 812 double display_size;
70322ae3 813 int pix, newpix;
814 int i;
afe761f3 815 const char *unitsfmt;
70322ae3 816
817 /*
010dd2a2 818 * A line with literally zero space usage should not be
819 * printed at all if it's a link to a subdirectory (since it
820 * probably means the whole thing was excluded by some
821 * --exclude-path wildcard). If it's [files] or the top-level
822 * line, though, we must always print _something_, and in that
823 * case we must fiddle about to prevent divisions by zero in
824 * the code below.
742c1a74 825 */
16139d21 826 if (!vec->sizes[MAXCOLOUR] && !vec->essential)
010dd2a2 827 return;
742c1a74 828 divisor = ctx->totalsize;
010dd2a2 829 if (!divisor) {
742c1a74 830 divisor = 1;
010dd2a2 831 }
742c1a74 832
833 /*
70322ae3 834 * Find the total size of this subdirectory.
835 */
836 size = vec->sizes[MAXCOLOUR];
afe761f3 837 compute_display_size(size, &unitsfmt, &display_size);
70322ae3 838 htprintf(ctx, "<tr>\n"
afe761f3 839 "<td style=\"padding: 0.2em; text-align: right\">");
840 htprintf(ctx, unitsfmt, display_size);
841 htprintf(ctx, "</td>\n");
70322ae3 842
843 /*
844 * Generate a colour bar.
845 */
846 htprintf(ctx, "<td style=\"padding: 0.2em\">\n");
847 begin_colour_bar(ctx);
848 pix = 0;
849 for (i = 0; i <= MAXCOLOUR; i++) {
850 asize = vec->sizes[i];
742c1a74 851 newpix = asize * PIXEL_SIZE / divisor;
70322ae3 852 add_to_colour_bar(ctx, i, newpix - pix);
853 pix = newpix;
854 }
855 add_to_colour_bar(ctx, -1, PIXEL_SIZE - pix);
856 end_colour_bar(ctx);
857 htprintf(ctx, "</td>\n");
858
859 /*
860 * Output size as a percentage of totalsize.
861 */
862 htprintf(ctx, "<td style=\"padding: 0.2em; text-align: right\">"
742c1a74 863 "%.2f%%</td>\n", (double)size / divisor * 100.0);
70322ae3 864
865 /*
866 * Output a subdirectory marker.
867 */
868 htprintf(ctx, "<td style=\"padding: 0.2em\">");
869 if (vec->name) {
870 int doing_href = 0;
871
c47f39de 872 if (ctx->uriformat && vec->want_href) {
873 char *targeturi = format_string(ctx->uriformat, vec->index,
874 ctx->t);
875 char *link = make_href(ctx->oururi, targeturi);
876 htprintf(ctx, "<a href=\"%s\">", link);
877 sfree(link);
878 sfree(targeturi);
70322ae3 879 doing_href = 1;
880 }
b49db535 881 if (vec->literal)
882 htprintf(ctx, "<code>");
70322ae3 883 htescape(ctx, vec->name, strlen(vec->name), 1);
b49db535 884 if (vec->literal)
885 htprintf(ctx, "</code>");
70322ae3 886 if (doing_href)
887 htprintf(ctx, "</a>");
888 }
889 htprintf(ctx, "</td>\n</tr>\n");
890}
891
0089cdbb 892int strcmptrailingpathsep(const char *a, const char *b)
893{
894 while (*a == *b && *a)
895 a++, b++;
896
897 if ((*a == pathsep && !a[1] && !*b) ||
898 (*b == pathsep && !b[1] && !*a))
899 return 0;
900
901 return (int)(unsigned char)*a - (int)(unsigned char)*b;
902}
903
f2e52893 904char *html_query(const void *t, unsigned long index,
00c5e40c 905 const struct html_config *cfg, int downlink)
70322ae3 906{
907 struct html actx, *ctx = &actx;
c47f39de 908 char *path, *path2, *p, *q;
70322ae3 909 char agebuf1[80], agebuf2[80];
c47f39de 910 size_t pathlen, subdirpos;
70322ae3 911 unsigned long index2;
912 int i;
913 struct vector **vecs;
914 int nvecs, vecsize;
915 unsigned long xi1, xi2, xj1, xj2;
916
917 if (index >= trie_count(t))
918 return NULL;
919
920 ctx->buf = NULL;
921 ctx->buflen = ctx->bufsize = 0;
922 ctx->t = t;
c47f39de 923 ctx->uriformat = cfg->uriformat;
70322ae3 924 htprintf(ctx, "<html>\n");
925
926 path = snewn(1+trie_maxpathlen(t), char);
927 ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char);
4fc86a06 928 if (cfg->uriformat)
929 ctx->oururi = format_string(cfg->uriformat, index, t);
930 else
931 ctx->oururi = NULL;
70322ae3 932
933 /*
934 * HEAD section.
935 */
936 htprintf(ctx, "<head>\n");
937 trie_getpath(t, index, path);
494ef23b 938 htprintf(ctx, "<title>");
939 htescape(ctx, cfg->html_title, strlen(cfg->html_title), 0);
940 htprintf(ctx, ": ");
70322ae3 941 htescape(ctx, path, strlen(path), 0);
942 htprintf(ctx, "</title>\n");
943 htprintf(ctx, "</head>\n");
944
945 /*
946 * Begin BODY section.
947 */
948 htprintf(ctx, "<body>\n");
949 htprintf(ctx, "<h3 align=center>Disk space breakdown by"
950 " last-access time</h3>\n");
951
952 /*
953 * Show the pathname we're centred on, with hyperlinks to
954 * parent directories where available.
955 */
956 htprintf(ctx, "<p align=center>\n<code>");
957 q = path;
cfe942fb 958 for (p = strchr(path, pathsep); p && p[1]; p = strchr(p, pathsep)) {
70322ae3 959 int doing_href = 0;
256c29a2 960 char c, *zp;
961
70322ae3 962 /*
963 * See if this path prefix exists in the trie. If so,
964 * generate a hyperlink.
965 */
256c29a2 966 zp = p;
967 if (p == path) /* special case for "/" at start */
968 zp++;
969
970 p++;
971
972 c = *zp;
973 *zp = '\0';
70322ae3 974 index2 = trie_before(t, path);
975 trie_getpath(t, index2, path2);
c47f39de 976 if (!strcmptrailingpathsep(path, path2) && cfg->uriformat) {
977 char *targeturi = format_string(cfg->uriformat, index2, t);
978 char *link = make_href(ctx->oururi, targeturi);
979 htprintf(ctx, "<a href=\"%s\">", link);
980 sfree(link);
981 sfree(targeturi);
70322ae3 982 doing_href = 1;
983 }
256c29a2 984 *zp = c;
985 htescape(ctx, q, zp - q, 1);
70322ae3 986 if (doing_href)
987 htprintf(ctx, "</a>");
256c29a2 988 htescape(ctx, zp, p - zp, 1);
989 q = p;
70322ae3 990 }
991 htescape(ctx, q, strlen(q), 1);
992 htprintf(ctx, "</code>\n");
993
994 /*
995 * Decide on the age limit of our colour coding, establish the
996 * colour thresholds, and write out a key.
997 */
70322ae3 998 ctx->now = time(NULL);
f2e52893 999 if (cfg->autoage) {
1000 ctx->oldest = index_order_stat(t, 0.05);
1001 ctx->newest = index_order_stat(t, 1.0);
1002 ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, -1);
1003 ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, +1);
1004 } else {
1005 ctx->oldest = cfg->oldest;
1006 ctx->newest = cfg->newest;
1007 ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, 0);
1008 ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, 0);
1009 }
3f940260 1010 for (i = 0; i < MAXCOLOUR; i++) {
70322ae3 1011 ctx->thresholds[i] =
3f940260 1012 ctx->oldest + (ctx->newest - ctx->oldest) * i / (MAXCOLOUR-1);
1013 }
1014 for (i = 0; i <= MAXCOLOUR; i++) {
1015 char buf[80];
1016
1017 if (i == 0) {
6a4287ff 1018 strcpy(buf, "&gt; ");
3f940260 1019 round_and_format_age(ctx, ctx->thresholds[0], buf+5, 0);
1020 } else if (i == MAXCOLOUR) {
6a4287ff 1021 strcpy(buf, "&lt; ");
3f940260 1022 round_and_format_age(ctx, ctx->thresholds[MAXCOLOUR-1], buf+5, 0);
1023 } else {
1024 unsigned long long midrange =
1025 (ctx->thresholds[i-1] + ctx->thresholds[i]) / 2;
1026 round_and_format_age(ctx, midrange, buf, 0);
1027 }
1028
1029 ctx->titletexts[i] = dupstr(buf);
70322ae3 1030 }
1031 htprintf(ctx, "<p align=center>Key to colour coding (mouse over for more detail):\n");
1032 htprintf(ctx, "<p align=center style=\"padding: 0; margin-top:0.4em; "
c828a5bc 1033 "margin-bottom:1em\">");
70322ae3 1034 begin_colour_bar(ctx);
1035 htprintf(ctx, "<td style=\"padding-right:1em\">%s</td>\n", agebuf1);
1036 for (i = 0; i < MAXCOLOUR; i++)
1037 add_to_colour_bar(ctx, i, 1);
1038 htprintf(ctx, "<td style=\"padding-left:1em\">%s</td>\n", agebuf2);
1039 end_colour_bar(ctx);
1040
1041 /*
1042 * Begin the main table.
1043 */
1044 htprintf(ctx, "<p align=center>\n<table style=\"margin:0; border:0\">\n");
1045
1046 /*
1047 * Find the total size of our entire subdirectory. We'll use
1048 * that as the scale for all the colour bars in this report.
1049 */
3f940260 1050 get_indices(t, path, &xi1, &xi2);
1051 ctx->totalsize = fetch_size(t, xi1, xi2, ULLONG_MAX);
70322ae3 1052
1053 /*
1054 * Generate a report line for the whole subdirectory.
1055 */
1056 vecsize = 64;
1057 vecs = snewn(vecsize, struct vector *);
1058 nvecs = 1;
16139d21 1059 vecs[0] = make_vector(ctx, path, 0, 1, NULL, 0);
70322ae3 1060 print_heading(ctx, "Overall");
1061 write_report_line(ctx, vecs[0]);
1062
1063 /*
1064 * Now generate report lines for all its children, and the
1065 * files contained in it.
1066 */
1067 print_heading(ctx, "Subdirectories");
1068
1069 vecs[0]->name = dupstr("[files]");
1070 get_indices(t, path, &xi1, &xi2);
1071 xi1++;
1072 pathlen = strlen(path);
256c29a2 1073 subdirpos = pathlen + 1;
1074 if (pathlen > 0 && path[pathlen-1] == pathsep)
1075 subdirpos--;
70322ae3 1076 while (xi1 < xi2) {
1077 trie_getpath(t, xi1, path2);
1078 get_indices(t, ctx->path2, &xj1, &xj2);
1079 xi1 = xj2;
16139d21 1080 if (!cfg->showfiles && xj2 - xj1 <= 1)
70322ae3 1081 continue; /* skip individual files */
1082 if (nvecs >= vecsize) {
1083 vecsize = nvecs * 3 / 2 + 64;
1084 vecs = sresize(vecs, vecsize, struct vector *);
1085 }
1086 assert(strlen(path2) > pathlen);
00c5e40c 1087 vecs[nvecs] = make_vector(ctx, path2, downlink && (xj2 - xj1 > 1), 0,
16139d21 1088 path2 + subdirpos, 1);
70322ae3 1089 for (i = 0; i <= MAXCOLOUR; i++)
1090 vecs[0]->sizes[i] -= vecs[nvecs]->sizes[i];
1091 nvecs++;
1092 }
1093
1094 qsort(vecs, nvecs, sizeof(vecs[0]), vec_compare);
1095
1096 for (i = 0; i < nvecs; i++)
1097 write_report_line(ctx, vecs[i]);
1098
1099 /*
1100 * Close the main table.
1101 */
1102 htprintf(ctx, "</table>\n");
1103
1104 /*
1105 * Finish up and tidy up.
1106 */
1107 htprintf(ctx, "</body>\n");
1108 htprintf(ctx, "</html>\n");
c47f39de 1109 sfree(ctx->oururi);
70322ae3 1110 sfree(path2);
1111 sfree(path);
1112 for (i = 0; i < nvecs; i++) {
1113 sfree(vecs[i]->name);
1114 sfree(vecs[i]);
1115 }
1116 sfree(vecs);
1117
1118 return ctx->buf;
1119}
00c5e40c 1120
1121int html_dump(const void *t, unsigned long index, unsigned long endindex,
1122 int maxdepth, const struct html_config *cfg,
1123 const char *pathprefix)
1124{
1125 /*
1126 * Determine the filename for this file.
1127 */
c47f39de 1128 assert(cfg->fileformat != NULL);
1129 char *filename = format_string(cfg->fileformat, index, t);
1130 char *path = dupfmt("%s%s", pathprefix, filename);
1131 sfree(filename);
00c5e40c 1132
1133 /*
1134 * Create the HTML itself. Don't write out downlinks from our
1135 * deepest level.
1136 */
1137 char *html = html_query(t, index, cfg, maxdepth != 0);
1138
1139 /*
1140 * Write it out.
1141 */
c47f39de 1142 FILE *fp = fopen(path, "w");
00c5e40c 1143 if (!fp) {
c47f39de 1144 fprintf(stderr, "%s: %s: open: %s\n", PNAME, path, strerror(errno));
00c5e40c 1145 return 1;
1146 }
1147 if (fputs(html, fp) < 0) {
c47f39de 1148 fprintf(stderr, "%s: %s: write: %s\n", PNAME, path, strerror(errno));
00c5e40c 1149 fclose(fp);
1150 return 1;
1151 }
1152 if (fclose(fp) < 0) {
c47f39de 1153 fprintf(stderr, "%s: %s: fclose: %s\n", PNAME, path, strerror(errno));
00c5e40c 1154 return 1;
1155 }
c47f39de 1156 sfree(path);
00c5e40c 1157
1158 /*
1159 * Recurse.
1160 */
1161 if (maxdepth != 0) {
1162 unsigned long subindex, subendindex;
1163 int newdepth = (maxdepth > 0 ? maxdepth - 1 : maxdepth);
c47f39de 1164 char rpath[1+trie_maxpathlen(t)];
00c5e40c 1165
1166 index++;
1167 while (index < endindex) {
c47f39de 1168 trie_getpath(t, index, rpath);
1169 get_indices(t, rpath, &subindex, &subendindex);
00c5e40c 1170 index = subendindex;
1171 if (subendindex - subindex > 1) {
1172 if (html_dump(t, subindex, subendindex, newdepth,
1173 cfg, pathprefix))
1174 return 1;
1175 }
1176 }
1177 }
1178 return 0;
1179}