maxpathlen = trie_maxpathlen(mappedfile);
pathbuf = snewn(maxpathlen, char);
+ if (!querydir || !gotdepth) {
+ /*
+ * Single output file.
+ */
+ if (!querydir) {
+ cfg.uriformat = "/%|/%p/%|%|/%p";
+ } else {
+ cfg.uriformat = NULL;
+ }
+ cfg.autoage = htmlautoagerange;
+ cfg.oldest = htmloldest;
+ cfg.newest = htmlnewest;
+ cfg.showfiles = showfiles;
+ } else {
+ cfg.uriformat = "/index.html%|/%/p.html";
+ cfg.fileformat = "/index.html%|/%/p.html";
+ cfg.autoage = htmlautoagerange;
+ cfg.oldest = htmloldest;
+ cfg.newest = htmlnewest;
+ cfg.showfiles = showfiles;
+ }
+
if (!querydir) {
/*
* If we're run in --cgi mode, read PATH_INFO to get
if (!path_info)
path_info = "";
+ /*
+ * Parse the path.
+ */
+ if (!html_parse_path(mappedfile, path_info, &cfg, &xi)) {
+ printf("Status: 404\nContent-type: text/html\n\n"
+ "<html><head>"
+ "<title>404 Not Found</title>"
+ "</head><body>"
+ "<h1>400 Not Found</h1>"
+ "<p>Invalid <code>agedu</code> pathname."
+ "</body></html>\n");
+ return 0;
+ }
+
/*
- * Because we need relative links to go to the
- * right place, it's important that our
- * PATH_INFO should contain a slash right at the
- * start, and no slashes anywhere else.
+ * If the path was parseable but not canonically
+ * expressed, return a redirect to the canonical
+ * version.
*/
- if (path_info[0] != '/') {
+ char *canonpath = html_format_path(mappedfile, &cfg, xi);
+ if (strcmp(canonpath, path_info)) {
char *servername = getenv("SERVER_NAME");
char *scriptname = getenv("SCRIPT_NAME");
if (!servername || !scriptname) {
return 0;
}
printf("Status: 301\n"
- "Location: http://%s/%s/\n"
+ "Location: http://%s/%s%s\n"
"Content-type: text/html\n\n"
"<html><head>"
"<title>301 Moved</title>"
"<h1>301 Moved</h1>"
"<p>Moved."
"</body></html>\n",
- servername, scriptname);
- return 0;
- } else if (strchr(path_info+1, '/')) {
- printf("Status: 404\nContent-type: text/html\n\n"
- "<html><head>"
- "<title>404 Not Found</title>"
- "</head><body>"
- "<h1>400 Not Found</h1>"
- "<p>Invalid <code>agedu</code> pathname."
- "</body></html>\n");
+ servername, scriptname, canonpath);
return 0;
}
- xi = atoi(path_info + 1);
- if (xi >= trie_count(mappedfile)) {
- printf("Status: 404\nContent-type: text/html\n\n"
- "<html><head>"
- "<title>404 Not Found</title>"
- "</head><body>"
- "<h1>404 Not Found</h1>"
- "<p>This is not a valid pathname index."
- "</body></html>\n");
- return 0;
- } else if (!index_has_root(mappedfile, xi)) {
- printf("Status: 404\nContent-type: text/html\n\n"
- "<html><head>"
- "<title>404 Not Found</title>"
- "</head><body>"
- "<h1>404 Not Found</h1>"
- "<p>Pathname index out of range."
- "</body></html>\n");
- return 0;
- }
} else {
/*
* In ordinary --html mode, process a query
/*
* Single output file.
*/
- if (!querydir) {
- cfg.format = "%.0lu"; /* use crosslinks in --cgi mode */
- } else {
- cfg.format = NULL;
- }
- cfg.rootpage = NULL;
- cfg.autoage = htmlautoagerange;
- cfg.oldest = htmloldest;
- cfg.newest = htmlnewest;
- cfg.showfiles = showfiles;
html = html_query(mappedfile, xi, &cfg, 1);
if (querydir && outfile != NULL) {
FILE *fp = fopen(outfile, "w");
make_successor(pathbuf);
xi2 = trie_before(mappedfile, pathbuf);
- cfg.format = "%lu.html";
- cfg.rootpage = "index.html";
- cfg.autoage = htmlautoagerange;
- cfg.oldest = htmloldest;
- cfg.newest = htmlnewest;
- cfg.showfiles = showfiles;
if (html_dump(mappedfile, xi, xi2, depth, &cfg, prefix))
return 1;
}
dcfg.port = httpserverport;
dcfg.closeoneof = closeoneof;
dcfg.basicauthdata = httpauthdata;
- pcfg.format = NULL;
- pcfg.rootpage = NULL;
+ pcfg.uriformat = "/%|/%p/%|%|/%p";
pcfg.autoage = htmlautoagerange;
pcfg.oldest = htmloldest;
pcfg.newest = htmlnewest;
const void *t;
unsigned long long totalsize, oldest, newest;
char *path2;
- char *href;
+ char *oururi;
size_t hreflen;
- const char *format, *rootpage;
+ const char *uriformat;
unsigned long long thresholds[MAXCOLOUR];
char *titletexts[MAXCOLOUR+1];
time_t now;
*fmt = fmts[shift];
}
-static void make_filename(char *buf, size_t buflen,
- const char *format, const char *rootpage,
- unsigned long index)
+struct format_option {
+ const char *prefix, *suffix; /* may include '%%' */
+ int prefixlen, suffixlen; /* does not count '%%' */
+ char fmttype; /* 0 for none, or 'n' or 'p' */
+ int translate_pathsep; /* pathsep rendered as '/'? */
+ int shorten_path; /* omit common prefix? */
+};
+
+/*
+ * Gets the next format option from a format string. Advances '*fmt'
+ * past it, or sets it to NULL if nothing is left.
+ */
+struct format_option get_format_option(const char **fmt)
+{
+ struct format_option ret;
+
+ /*
+ * Scan for prefix of format.
+ */
+ ret.prefix = *fmt;
+ ret.prefixlen = 0;
+ while (1) {
+ if (**fmt == '\0') {
+ /*
+ * No formatting directive, and this is the last option.
+ */
+ ret.suffix = *fmt;
+ ret.suffixlen = 0;
+ ret.fmttype = '\0';
+ *fmt = NULL;
+ return ret;
+ } else if (**fmt == '%') {
+ if ((*fmt)[1] == '%') {
+ (*fmt) += 2; /* just advance one extra */
+ ret.prefixlen++;
+ } else if ((*fmt)[1] == '|') {
+ /*
+ * No formatting directive.
+ */
+ ret.suffix = *fmt;
+ ret.suffixlen = 0;
+ ret.fmttype = '\0';
+ (*fmt) += 2; /* advance to start of next option */
+ return ret;
+ } else {
+ break;
+ }
+ } else {
+ (*fmt)++; /* normal character */
+ ret.prefixlen++;
+ }
+ }
+
+ /*
+ * Interpret formatting directive with flags.
+ */
+ (*fmt)++;
+ ret.translate_pathsep = ret.shorten_path = 1;
+ while (1) {
+ char c = *(*fmt)++;
+ assert(c);
+ if (c == '/') {
+ ret.translate_pathsep = 0;
+ } else if (c == '-') {
+ ret.shorten_path = 0;
+ } else {
+ assert(c == 'n' || c == 'p');
+ ret.fmttype = c;
+ break;
+ }
+ }
+
+ /*
+ * Scan for suffix.
+ */
+ ret.suffix = *fmt;
+ ret.suffixlen = 0;
+ while (1) {
+ if (**fmt == '\0') {
+ /*
+ * This is the last option.
+ */
+ *fmt = NULL;
+ return ret;
+ } else if (**fmt != '%') {
+ (*fmt)++; /* normal character */
+ ret.suffixlen++;
+ } else {
+ if ((*fmt)[1] == '%') {
+ (*fmt) += 2; /* just advance one extra */
+ ret.suffixlen++;
+ } else {
+ assert((*fmt)[1] == '|');
+ (*fmt) += 2; /* advance to start of next option */
+ return ret;
+ }
+ }
+ }
+}
+
+char *format_string(const char *fmt, unsigned long index, const void *t)
{
- if (index == 0 && rootpage)
- snprintf(buf, buflen, "%s", rootpage);
- else
- snprintf(buf, buflen, format, index);
+ int maxlen;
+ char *ret = NULL, *p = NULL;
+ char *path = NULL, *q = NULL;
+ char pathsep = trie_pathsep(t);
+ int maxpathlen = trie_maxpathlen(t);
+
+ while (fmt) {
+ struct format_option opt = get_format_option(&fmt);
+ if (index && !opt.fmttype)
+ continue; /* option is only good for the root, which this isn't */
+
+ maxlen = opt.prefixlen + opt.suffixlen + 1;
+ switch (opt.fmttype) {
+ case 'n':
+ maxlen += 40; /* generous length for an integer */
+ break;
+ case 'p':
+ maxlen += 3*maxpathlen; /* might have to escape everything */
+ break;
+ }
+ ret = snewn(maxlen, char);
+ p = ret;
+ while (opt.prefixlen-- > 0) {
+ if ((*p++ = *opt.prefix++) == '%')
+ opt.prefix++;
+ }
+ switch (opt.fmttype) {
+ case 'n':
+ p += sprintf(p, "%lu", index);
+ break;
+ case 'p':
+ path = snewn(1+trie_maxpathlen(t), char);
+ if (opt.shorten_path) {
+ trie_getpath(t, 0, path);
+ q = path + strlen(path);
+ trie_getpath(t, index, path);
+ if (*q == pathsep)
+ q++;
+ } else {
+ trie_getpath(t, index, path);
+ q = path;
+ }
+ while (*q) {
+ char c = *q++;
+ if (c == pathsep && opt.translate_pathsep)
+ *p++ = '/';
+ else if (!isalnum((unsigned char)c) && !strchr("-.@_", c))
+ p += sprintf(p, "=%02X", (unsigned char)c);
+ else
+ *p++ = c;
+ }
+ sfree(path);
+ break;
+ }
+ while (opt.suffixlen-- > 0) {
+ if ((*p++ = *opt.suffix++) == '%')
+ opt.suffix++;
+ }
+ *p = '\0';
+ assert(p - ret < maxlen);
+ return ret;
+ }
+ assert(!"Getting here implies an incomplete set of formats");
+}
+
+char *html_format_path(const void *t, const struct html_config *cfg,
+ unsigned long index)
+{
+ return format_string(cfg->uriformat, index, t);
+}
+
+int html_parse_path(const void *t, const char *path,
+ const struct html_config *cfg, unsigned long *index)
+{
+ int len = strlen(path);
+ int midlen;
+ const char *p, *q;
+ char *r;
+ char pathsep = trie_pathsep(t);
+ const char *fmt = cfg->uriformat;
+
+ while (fmt) {
+ struct format_option opt = get_format_option(&fmt);
+
+ /*
+ * Check prefix and suffix.
+ */
+ midlen = len - opt.prefixlen - opt.suffixlen;
+ if (midlen < 0)
+ continue; /* prefix and suffix don't even fit */
+
+ p = path;
+ while (opt.prefixlen > 0) {
+ char c = *opt.prefix++;
+ if (c == '%')
+ opt.prefix++;
+ if (*p != c)
+ break;
+ p++;
+ opt.prefixlen--;
+ }
+ if (opt.prefixlen > 0)
+ continue; /* prefix didn't match */
+
+ q = path + len - opt.suffixlen;
+ while (opt.suffixlen > 0) {
+ char c = *opt.suffix++;
+ if (c == '%')
+ opt.suffix++;
+ if (*q != c)
+ break;
+ q++;
+ opt.suffixlen--;
+ }
+ if (opt.suffixlen > 0)
+ continue; /* suffix didn't match */
+
+ /*
+ * Check the data in between. p points at it, and it's midlen
+ * characters long.
+ */
+ if (opt.fmttype == '\0') {
+ if (midlen == 0) {
+ /*
+ * Successful match against a root format.
+ */
+ *index = 0;
+ return 1;
+ }
+ } else if (opt.fmttype == 'n') {
+ *index = 0;
+ while (midlen > 0) {
+ if (*p >= '0' && *p <= '9')
+ *index = *index * 10 + (*p - '0');
+ else
+ break;
+ midlen--;
+ p++;
+ }
+ if (midlen == 0) {
+ /*
+ * Successful match against a numeric format.
+ */
+ return 1;
+ }
+ } else {
+ assert(opt.fmttype == 'p');
+
+ int maxoutlen = trie_maxpathlen(t) + 1;
+ int maxinlen = midlen + 1;
+ char triepath[maxinlen+maxoutlen];
+
+ if (opt.shorten_path) {
+ trie_getpath(t, 0, triepath);
+ r = triepath + strlen(triepath);
+ if (r > triepath && r[-1] != pathsep)
+ *r++ = pathsep;
+ } else {
+ r = triepath;
+ }
+
+ while (midlen > 0) {
+ if (*p == '/' && opt.translate_pathsep) {
+ *r++ = pathsep;
+ p++;
+ midlen--;
+ } else if (*p == '=') {
+ if (midlen < 3 ||
+ !isxdigit((unsigned char)p[1]) ||
+ !isxdigit((unsigned char)p[2]))
+ break; /* faulty escape encoding */
+ char x[3];
+ unsigned cval;
+ x[0] = p[1];
+ x[1] = p[2];
+ x[2] = '\0';
+ sscanf(x, "%x", &cval);
+ *r++ = cval;
+ p += 3;
+ midlen -= 3;
+ } else {
+ *r++ = *p;
+ p++;
+ midlen--;
+ }
+ }
+ if (midlen > 0)
+ continue; /* something went wrong in that loop */
+ assert(r - triepath < maxinlen+maxoutlen);
+ *r = '\0';
+
+ unsigned long gotidx = trie_before(t, triepath);
+ if (gotidx >= trie_count(t))
+ continue; /* index out of range */
+ char retpath[1+maxoutlen];
+ trie_getpath(t, gotidx, retpath);
+ if (strcmp(triepath, retpath))
+ continue; /* exact path not found in trie */
+ if (!index_has_root(t, gotidx))
+ continue; /* path is not a directory */
+
+ /*
+ * Successful path-based match.
+ */
+ *index = gotidx;
+ return 1;
+ }
+ }
+
+ return 0; /* no match from any format option */
+}
+
+char *make_href(const char *source, const char *target)
+{
+ /*
+ * We insist that both source and target URIs start with a /, or
+ * else we won't be reliably able to construct relative hrefs
+ * between them (e.g. because we've got a suffix on the end of
+ * some CGI pathname that this function doesn't know the final
+ * component of).
+ */
+ assert(*source == '/');
+ assert(*target == '/');
+
+ /*
+ * Find the last / in source. Everything up to but not including
+ * that is the directory to which the output href will be
+ * relative. We enforce by assertion that there must be a /
+ * somewhere in source, or else we can't construct a relative href
+ * at all
+ */
+ const char *sourceend = strrchr(source, '/');
+ assert(sourceend != NULL);
+
+ /*
+ * See how far the target URI agrees with the source one, up to
+ * and including that /.
+ */
+ const char *s = source, *t = target;
+ while (s <= sourceend && *s == *t)
+ s++, t++;
+
+ /*
+ * We're only interested in agreement of complete path components,
+ * so back off until we're sitting just after a shared /.
+ */
+ while (s > source && s[-1] != '/')
+ s--, t--;
+ assert(s > source);
+
+ /*
+ * Now we need some number of levels of "../" to get from source
+ * to here, and then we just replicate the rest of 'target'.
+ */
+ int levels = 0;
+ while (s <= sourceend) {
+ if (*s == '/')
+ levels++;
+ s++;
+ }
+ int len = 3*levels + strlen(t);
+ if (len == 0) {
+ /* One last special case: if target has no tail _and_ we
+ * haven't written out any "../". */
+ return dupstr("./");
+ } else {
+ char *ret = snewn(len+1, char);
+ char *p = ret;
+ while (levels-- > 0) {
+ *p++ = '.';
+ *p++ = '.';
+ *p++ = '/';
+ }
+ strcpy(p, t);
+ return ret;
+ }
}
#define PIXEL_SIZE 600 /* FIXME: configurability? */
if (vec->name) {
int doing_href = 0;
- if (ctx->format && vec->want_href) {
- make_filename(ctx->href, ctx->hreflen,
- ctx->format, ctx->rootpage,
- vec->index);
- htprintf(ctx, "<a href=\"%s\">", ctx->href);
+ if (ctx->uriformat && vec->want_href) {
+ char *targeturi = format_string(ctx->uriformat, vec->index,
+ ctx->t);
+ char *link = make_href(ctx->oururi, targeturi);
+ htprintf(ctx, "<a href=\"%s\">", link);
+ sfree(link);
+ sfree(targeturi);
doing_href = 1;
}
if (vec->literal)
const struct html_config *cfg, int downlink)
{
struct html actx, *ctx = &actx;
- char *path, *path2, *p, *q, *href;
+ char *path, *path2, *p, *q;
char agebuf1[80], agebuf2[80];
- size_t pathlen, subdirpos, hreflen;
+ size_t pathlen, subdirpos;
unsigned long index2;
int i;
struct vector **vecs;
ctx->buf = NULL;
ctx->buflen = ctx->bufsize = 0;
ctx->t = t;
- ctx->format = cfg->format;
- ctx->rootpage = cfg->rootpage;
+ ctx->uriformat = cfg->uriformat;
htprintf(ctx, "<html>\n");
path = snewn(1+trie_maxpathlen(t), char);
ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char);
- if (cfg->format) {
- hreflen = strlen(cfg->format) + 100;
- href = snewn(hreflen, char);
- } else {
- hreflen = 0;
- href = NULL;
- }
- ctx->hreflen = hreflen;
- ctx->href = href;
+ ctx->oururi = format_string(cfg->uriformat, index, t);
/*
* HEAD section.
*zp = '\0';
index2 = trie_before(t, path);
trie_getpath(t, index2, path2);
- if (!strcmptrailingpathsep(path, path2) && cfg->format) {
- make_filename(href, hreflen, cfg->format, cfg->rootpage, index2);
- if (!*href) /* special case that we understand */
- strcpy(href, "./");
- htprintf(ctx, "<a href=\"%s\">", href);
+ if (!strcmptrailingpathsep(path, path2) && cfg->uriformat) {
+ char *targeturi = format_string(cfg->uriformat, index2, t);
+ char *link = make_href(ctx->oururi, targeturi);
+ htprintf(ctx, "<a href=\"%s\">", link);
+ sfree(link);
+ sfree(targeturi);
doing_href = 1;
}
*zp = c;
*/
htprintf(ctx, "</body>\n");
htprintf(ctx, "</html>\n");
- sfree(href);
+ sfree(ctx->oururi);
sfree(path2);
sfree(path);
for (i = 0; i < nvecs; i++) {
/*
* Determine the filename for this file.
*/
- assert(cfg->format != NULL);
- int prefixlen = strlen(pathprefix);
- int fnmax = strlen(pathprefix) + strlen(cfg->format) + 100;
- char filename[fnmax];
- strcpy(filename, pathprefix);
- make_filename(filename + prefixlen, fnmax - prefixlen,
- cfg->format, cfg->rootpage, index);
+ assert(cfg->fileformat != NULL);
+ char *filename = format_string(cfg->fileformat, index, t);
+ char *path = dupfmt("%s%s", pathprefix, filename);
+ sfree(filename);
/*
* Create the HTML itself. Don't write out downlinks from our
/*
* Write it out.
*/
- FILE *fp = fopen(filename, "w");
+ FILE *fp = fopen(path, "w");
if (!fp) {
- fprintf(stderr, "%s: %s: open: %s\n", PNAME,
- filename, strerror(errno));
+ fprintf(stderr, "%s: %s: open: %s\n", PNAME, path, strerror(errno));
return 1;
}
if (fputs(html, fp) < 0) {
- fprintf(stderr, "%s: %s: write: %s\n", PNAME,
- filename, strerror(errno));
+ fprintf(stderr, "%s: %s: write: %s\n", PNAME, path, strerror(errno));
fclose(fp);
return 1;
}
if (fclose(fp) < 0) {
- fprintf(stderr, "%s: %s: fclose: %s\n", PNAME,
- filename, strerror(errno));
+ fprintf(stderr, "%s: %s: fclose: %s\n", PNAME, path, strerror(errno));
return 1;
}
+ sfree(path);
/*
* Recurse.
if (maxdepth != 0) {
unsigned long subindex, subendindex;
int newdepth = (maxdepth > 0 ? maxdepth - 1 : maxdepth);
- char path[1+trie_maxpathlen(t)];
+ char rpath[1+trie_maxpathlen(t)];
index++;
while (index < endindex) {
- trie_getpath(t, index, path);
- get_indices(t, path, &subindex, &subendindex);
+ trie_getpath(t, index, rpath);
+ get_indices(t, rpath, &subindex, &subendindex);
index = subendindex;
if (subendindex - subindex > 1) {
if (html_dump(t, subindex, subendindex, newdepth,
struct html_config {
/*
- * If "format" is non-NULL, it is treated as an sprintf format
- * string which must contain exactly one %lu and no other
- * formatting directives (other than %%, which doesn't count);
- * this will be used to construct URLs to use in hrefs
- * pointing to queries of other related (parent and child)
- * pathnames.
+ * Configure the format of the URI pathname fragment corresponding
+ * to a given tree entry.
+ *
+ * 'uriformat' is expected to have the following format:
+ * - it consists of one or more _options_, each indicating a
+ * particular way to format a URI, separated by '%|'
+ * - each option contains _at most one_ formatting directive;
+ * without any, it is assumed to only be able to encode the
+ * root tree entry
+ * - the formatting directive may be followed before and/or
+ * afterwards with literal text; percent signs in that literal
+ * text are specified as %% (which doesn't count as a
+ * formatting directive for the 'at most one' rule)
+ * - formatting directives are as follows:
+ * + '%n' outputs the numeric index (in decimal) of the tree
+ * entry
+ * + '%p' outputs the pathname of the tree entry, not counting
+ * any common prefix of the whole tree or a subdirectory
+ * separator following that (so that the root directory of
+ * the tree will always be rendered as the empty string).
+ * The subdirectory separator is translated into '/'; any
+ * remotely worrying character is escaped as = followed by
+ * two hex digits (including, in particular, = itself). The
+ * only characters not escaped are the ASCII alphabets and
+ * numbers, the subdirectory separator as mentioned above,
+ * and the four punctuation characters -.@_ .
+ * - '%/p' outputs the pathname of the tree entry, but this time
+ * the subdirectory separator is also considered to be a
+ * worrying character and is escaped.
+ * - '%-p' and '%-/p' are like '%p' and '%/p' respectively,
+ * except that they use the full pathname stored in the tree
+ * without stripping a common prefix.
+ *
+ * These formats are used both for generating and parsing URI
+ * fragments. When generating, the first valid option is used
+ * (which is always the very first one if we're generating the
+ * root URI, or else it's the first option with any formatting
+ * directive); when parsing, the first option that matches will be
+ * accepted. (Thus, you can have '.../subdir' and '.../subdir/'
+ * both accepted, but make the latter canonical; clients of this
+ * mechanism will typically regenerate a URI string after parsing
+ * an index out of it, and return an HTTP redirect if it isn't in
+ * canonical form.)
+ *
+ * All hyperlinks should be correctly generated as relative (i.e.
+ * with the right number of ../ and ./ considering both the
+ * pathname for the page currently being generated, and the one
+ * for the link target).
+ *
+ * If 'uriformat' is NULL, the HTML is generated without hyperlinks.
*/
- const char *format;
+ const char *uriformat;
/*
- * If "rootpage" is non-NULL, it overrides "format" to give a
- * special name (e.g. "index.html") to the top-level page of the
- * index.
+ * Configure the filenames output by html_dump(). These can be
+ * configured separately from the URI formats, so that the root
+ * file can be called index.html on disk but have a notional URI
+ * of just / or similar.
+ *
+ * Formatting directives are the same as the uriformat above.
*/
- const char *rootpage;
+ const char *fileformat;
/*
* Time stamps to assign to the extreme ends of the colour
};
/*
+ * Parse a URI pathname segment against the URI formats specified in
+ * 'cfg', and return a numeric index in '*index'. Return value is true
+ * on success, or false if the pathname makes no sense, or the index
+ * is out of range, or the index does not correspond to a directory in
+ * the trie.
+ */
+int html_parse_path(const void *t, const char *path,
+ const struct html_config *cfg, unsigned long *index);
+
+/*
+ * Generate a URI pathname segment from an index.
+ */
+char *html_format_path(const void *t, const struct html_config *cfg,
+ unsigned long index);
+
+/*
* Generate an HTML document containing the results of a query
* against the pathname at a given index. Returns a dynamically
* allocated piece of memory containing the entire HTML document,
"This is a restricted-access set of pages.");
}
} else {
- char *q;
p = ctx->url;
- p += strspn(p, "/?");
- index = strtoul(p, &q, 10);
- if (*q) {
+ if (!html_parse_path(ctx->t, p, cfg, &index)) {
ret = http_error("404", "Not Found", NULL,
- "This is not a valid pathname index.");
+ "This is not a valid pathname.");
} else {
- document = html_query(ctx->t, index, cfg, 1);
- if (document) {
- ret = http_success("text/html", 1, document);
- sfree(document);
- } else {
- ret = http_error("404", "Not Found", NULL,
- "Pathname index out of range.");
- }
+ char *canonpath = html_format_path(ctx->t, cfg, index);
+ if (!strcmp(canonpath, p)) {
+ /*
+ * This is a canonical path. Return the document.
+ */
+ document = html_query(ctx->t, index, cfg, 1);
+ if (document) {
+ ret = http_success("text/html", 1, document);
+ sfree(document);
+ } else {
+ ret = http_error("404", "Not Found", NULL,
+ "This is not a valid pathname.");
+ }
+ } else {
+ /*
+ * This is a non-canonical path. Return a redirect
+ * to the right one.
+ *
+ * To do this, we must search the request headers
+ * for Host:, to see what the client thought it
+ * was calling our server.
+ */
+
+ char *host = NULL;
+ q = ctx->data + ctx->datalen;
+ for (p = ctx->headers; p < q; p++) {
+ const char *hdr = "Host:";
+ int i;
+ for (i = 0; hdr[i]; i++) {
+ if (p >= q || tolower((unsigned char)*p) !=
+ tolower((unsigned char)hdr[i]))
+ break;
+ p++;
+ }
+ if (!hdr[i])
+ break; /* found our header */
+ p = memchr(p, '\n', q - p);
+ if (!p)
+ p = q;
+ }
+ if (p < q) {
+ while (p < q && isspace((unsigned char)*p))
+ p++;
+ r = p;
+ while (p < q) {
+ if (*p == '\r' && (p+1 >= q || p[1] == '\n'))
+ break;
+ p++;
+ }
+ host = snewn(p-r+1, char);
+ memcpy(host, r, p-r);
+ host[p-r] = '\0';
+ }
+ if (host) {
+ char *header = dupfmt("Location: http://%s%s\r\n",
+ host, canonpath);
+ ret = http_error("301", "Moved", header,
+ "This is not the canonical form of"
+ " this pathname.");
+ sfree(header);
+ } else {
+ ret = http_error("400", "Bad Request", NULL,
+ "Needed a Host: header to return"
+ " the intended redirection.");
+ }
+ }
+ sfree(canonpath);
}
}
return ret;
socklen_t addrlen;
struct html_config cfg = *incfg;
- cfg.format = "%.0lu";
-
/*
* Establish the listening socket and retrieve its port
* number.