}
}
-char *format_string(const char *fmt, unsigned long index, const void *t)
+char *format_string_inner(const char *fmt, int nescape,
+ unsigned long index, const void *t)
{
int maxlen;
char *ret = NULL, *p = NULL;
char *path = NULL, *q = NULL;
char pathsep = trie_pathsep(t);
int maxpathlen = trie_maxpathlen(t);
+ int charindex;
while (fmt) {
struct format_option opt = get_format_option(&fmt);
trie_getpath(t, index, path);
q = path;
}
+ charindex = 0;
while (*q) {
char c = *q++;
- if (c == pathsep && opt.translate_pathsep)
+ if (c == pathsep && opt.translate_pathsep) {
*p++ = '/';
- else if (!isalnum((unsigned char)c) && !strchr("-.@_", c))
+ charindex = 0;
+ } else if (charindex < nescape ||
+ (!isalnum((unsigned char)c) &&
+ ((charindex == 0 && c=='.') ||
+ !strchr("-.@_", c)))) {
p += sprintf(p, "=%02X", (unsigned char)c);
- else
+ charindex++;
+ } else {
*p++ = c;
+ charindex++;
+ }
}
sfree(path);
break;
assert(!"Getting here implies an incomplete set of formats");
}
-char *html_format_path(const void *t, const struct html_config *cfg,
- unsigned long index)
-{
- return format_string(cfg->uriformat, index, t);
-}
-
-int html_parse_path(const void *t, const char *path,
- const struct html_config *cfg, unsigned long *index)
+int parse_path(const void *t, const char *path,
+ const char *fmt, unsigned long *index)
{
int len = strlen(path);
int midlen;
const char *p, *q;
char *r;
char pathsep = trie_pathsep(t);
- const char *fmt = cfg->uriformat;
while (fmt) {
struct format_option opt = get_format_option(&fmt);
p++;
midlen--;
} else if (*p == '=') {
+ /*
+ * We intentionally do not check whether the
+ * escaped character _should_ have been escaped
+ * according to the rules in html_format_path.
+ *
+ * All clients of this parsing function, after a
+ * successful parse, call html_format_path to find
+ * the canonical URI for the same index and return
+ * an HTTP redirect if the provided URI was not
+ * exactly equal to that canonical form. This is
+ * critical when the correction involves adding or
+ * removing a trailing slash (because then
+ * relative hrefs on the generated page can be
+ * computed with respect to the canonical URI
+ * instead of having to remember what the actual
+ * URI was), but also has the useful effect that
+ * if a user attempts to type in (guess) a URI by
+ * hand they don't have to remember the escaping
+ * rules - as long as they type _something_ that
+ * this code can parse into a recognisable
+ * pathname, it will be automatically 301ed into
+ * the canonical form.
+ */
if (midlen < 3 ||
!isxdigit((unsigned char)p[1]) ||
!isxdigit((unsigned char)p[2]))
return 0; /* no match from any format option */
}
+char *format_string(const char *fmt, unsigned long index, const void *t)
+{
+ unsigned long indexout, parseret;
+ char *ret;
+ const char *stepfmt = fmt;
+ int nescape = 0;
+
+ /*
+ * Format the string using whichever format option first works.
+ */
+ ret = format_string_inner(fmt, 0, index, t);
+
+ /*
+ * Now re-_parse_ the string, to see if it gives the same index
+ * back. It might not, if a pathname is valid in two formats: for
+ * instance, if you use '-H -d max' to generate a static HTML dump
+ * from scanning a directory which has a subdir called 'index',
+ * you might well find that the top-level file wants to be called
+ * index.html and so does the one for that subdir.
+ *
+ * We fix this by formatting the string again with more and more
+ * characters escaped, so that the non-root 'index.html' becomes
+ * (e.g.) '=69ndex.html', or '=69=6edex.html' if that doesn't
+ * work, etc.
+ */
+ while (1) {
+ struct format_option opt = get_format_option(&stepfmt);
+
+ /*
+ * Parse the pathname and see if it gives the right index.
+ */
+ int parseret = parse_path(t, ret, fmt, &indexout);
+ assert(parseret != 0);
+ if (indexout == index)
+ break; /* path now parses successfully */
+
+ /*
+ * If not, try formatting it again.
+ */
+ char *new = format_string_inner(fmt, ++nescape, index, t);
+ assert(strcmp(new, ret)); /* if nescape gets too big, give up */
+ sfree(ret);
+ ret = new;
+ }
+
+ return ret;
+}
+
+char *html_format_path(const void *t, const struct html_config *cfg,
+ unsigned long index)
+{
+ return format_string(cfg->uriformat, index, t);
+}
+
+int html_parse_path(const void *t, const char *path,
+ const struct html_config *cfg, unsigned long *index)
+{
+ return parse_path(t, path, cfg->uriformat, index);
+}
+
char *make_href(const char *source, const char *target)
{
/*
path = snewn(1+trie_maxpathlen(t), char);
ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char);
- ctx->oururi = format_string(cfg->uriformat, index, t);
+ if (cfg->uriformat)
+ ctx->oururi = format_string(cfg->uriformat, index, t);
+ else
+ ctx->oururi = NULL;
/*
* HEAD section.
*/
htprintf(ctx, "<head>\n");
trie_getpath(t, index, path);
- htprintf(ctx, "<title>%s: ", PNAME);
+ htprintf(ctx, "<title>");
+ htescape(ctx, cfg->html_title, strlen(cfg->html_title), 0);
+ htprintf(ctx, ": ");
htescape(ctx, path, strlen(path), 0);
htprintf(ctx, "</title>\n");
htprintf(ctx, "</head>\n");
char buf[80];
if (i == 0) {
- strcpy(buf, "< ");
+ strcpy(buf, "> ");
round_and_format_age(ctx, ctx->thresholds[0], buf+5, 0);
} else if (i == MAXCOLOUR) {
- strcpy(buf, "> ");
+ strcpy(buf, "< ");
round_and_format_age(ctx, ctx->thresholds[MAXCOLOUR-1], buf+5, 0);
} else {
unsigned long long midrange =