From: simon Date: Sat, 1 Nov 2008 17:05:32 +0000 (+0000) Subject: Fix the various issues centring around the anomaly in Unix pathname X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/agedu/commitdiff_plain/256c29a27ba79677a78bc676a3cc73ee1aad2d68 Fix the various issues centring around the anomaly in Unix pathname syntax that the canonical name of "/" ends in a trailing '/'. git-svn-id: svn://svn.tartarus.org/sgt/agedu@8245 cda61777-01e9-0310-a592-d414129be87e --- diff --git a/TODO b/TODO index fe0cb71..51c00d2 100644 --- a/TODO +++ b/TODO @@ -25,22 +25,6 @@ Before it's non-embarrassingly releasable: + configurable recursive output depth + show the right bits last - - figure out what to do about scans starting in the root directory - + Currently we end up with a double leading slash on the - pathnames, which is ugly, and we also get a zero-length href - in between those slashes which means the web interface doesn't - let you click back up to the top level at all. - + One big problem here is that a lot of the code assumes that - you can find the extent of a pathname by searching for "foo" - and "foo^A", trusting that anything inside the directory will - begin "foo/". So I'd need to consistently fix this everywhere - so that a trailing slash is disregarded while doing it, but - not actually removed. - + The text output gets it all wrong. - + The HTML output is fiddly even at the design stage: where - would I _ideally_ put the link to click on to get back to /? - It's unclear! - - cross-Unix portability: + use autoconf * configure use of stat64 diff --git a/agedu.c b/agedu.c index 246f002..a3c2db6 100644 --- a/agedu.c +++ b/agedu.c @@ -171,7 +171,8 @@ static void text_query(const void *mappedfile, const char *querydir, * (inclusive) and that filename with a ^A on the end * (exclusive). So find the x indices for each. */ - sprintf(pathbuf, "%s\001", querydir); + strcpy(pathbuf, querydir); + make_successor(pathbuf); xi1 = trie_before(mappedfile, querydir); xi2 = trie_before(mappedfile, pathbuf); @@ -196,7 +197,7 @@ static void text_query(const void *mappedfile, const char *querydir, while (xi1 < xi2) { trie_getpath(mappedfile, xi1, pathbuf); text_query(mappedfile, pathbuf, t, depth-1); - strcat(pathbuf, "\001"); + make_successor(pathbuf); xi1 = trie_before(mappedfile, pathbuf); } } diff --git a/du.c b/du.c index da16e53..88edd27 100644 --- a/du.c +++ b/du.c @@ -114,7 +114,16 @@ static void du_recurse(char **path, size_t pathlen, size_t *pathsize, *pathsize = newpathlen * 3 / 2 + 256; *path = sresize(*path, *pathsize, char); } - sprintf(*path + pathlen, "/%s", names[i]); + /* + * Avoid duplicating a slash if we got a trailing one to + * begin with (i.e. if we're starting the scan in '/' itself). + */ + if (pathlen > 0 && (*path)[pathlen-1] == '/') { + strcpy(*path + pathlen, names[i]); + newpathlen--; + } else { + sprintf(*path + pathlen, "/%s", names[i]); + } du_recurse(path, newpathlen, pathsize, gotdata, gotdata_ctx); diff --git a/html.c b/html.c index bc9905f..be13590 100644 --- a/html.c +++ b/html.c @@ -166,12 +166,14 @@ static void get_indices(const void *t, char *path, unsigned long *xi1, unsigned long *xi2) { size_t pathlen = strlen(path); + int c1 = path[pathlen], c2 = (pathlen > 0 ? path[pathlen-1] : 0); *xi1 = trie_before(t, path); - path[pathlen] = '\001'; - path[pathlen+1] = '\0'; + make_successor(path); *xi2 = trie_before(t, path); - path[pathlen] = '\0'; + path[pathlen] = c1; + if (pathlen > 0) + path[pathlen-1] = c2; } static unsigned long long fetch_size(const void *t, char *path, @@ -393,7 +395,7 @@ char *html_query(const void *t, unsigned long index, struct html actx, *ctx = &actx; char *path, *path2, *p, *q, *href; char agebuf1[80], agebuf2[80]; - size_t pathlen, hreflen; + size_t pathlen, subdirpos, hreflen; unsigned long index2; int i; struct vector **vecs; @@ -444,13 +446,22 @@ char *html_query(const void *t, unsigned long index, */ htprintf(ctx, "

\n"); q = path; - for (p = strchr(path, pathsep); p; p = strchr(p+1, pathsep)) { + for (p = strchr(path, pathsep); p; p = strchr(p, pathsep)) { int doing_href = 0; + char c, *zp; + /* * See if this path prefix exists in the trie. If so, * generate a hyperlink. */ - *p = '\0'; + zp = p; + if (p == path) /* special case for "/" at start */ + zp++; + + p++; + + c = *zp; + *zp = '\0'; index2 = trie_before(t, path); trie_getpath(t, index2, path2); if (!strcmp(path, path2) && cfg->format) { @@ -458,12 +469,12 @@ char *html_query(const void *t, unsigned long index, htprintf(ctx, "", href); doing_href = 1; } - *p = pathsep; - htescape(ctx, q, p - q, 1); - q = p + 1; + *zp = c; + htescape(ctx, q, zp - q, 1); if (doing_href) htprintf(ctx, ""); - htescape(ctx, p, 1, 1); + htescape(ctx, zp, p - zp, 1); + q = p; } htescape(ctx, q, strlen(q), 1); htprintf(ctx, "\n"); @@ -529,6 +540,9 @@ char *html_query(const void *t, unsigned long index, get_indices(t, path, &xi1, &xi2); xi1++; pathlen = strlen(path); + subdirpos = pathlen + 1; + if (pathlen > 0 && path[pathlen-1] == pathsep) + subdirpos--; while (xi1 < xi2) { trie_getpath(t, xi1, path2); get_indices(t, ctx->path2, &xj1, &xj2); @@ -540,7 +554,7 @@ char *html_query(const void *t, unsigned long index, vecs = sresize(vecs, vecsize, struct vector *); } assert(strlen(path2) > pathlen); - vecs[nvecs] = make_vector(ctx, path2, 1, path2 + pathlen + 1); + vecs[nvecs] = make_vector(ctx, path2, 1, path2 + subdirpos); for (i = 0; i <= MAXCOLOUR; i++) vecs[0]->sizes[i] -= vecs[nvecs]->sizes[i]; nvecs++; diff --git a/trie.c b/trie.c index acce8be..0466a34 100644 --- a/trie.c +++ b/trie.c @@ -617,3 +617,12 @@ off_t trie_get_index_offset(const void *t) { return ((const struct trie_header *)t)->indexroot; } + +void make_successor(char *pathbuf) +{ + int len = strlen(pathbuf); + if (len > 0 && pathbuf[len-1] == pathsep) + len--; + pathbuf[len] = '\001'; + pathbuf[len+1] = '\0'; +} diff --git a/trie.h b/trie.h index 40f6f75..ef52a08 100644 --- a/trie.h +++ b/trie.h @@ -107,3 +107,19 @@ void triewalk_free(triewalk *tw); */ void trie_set_index_offset(void *t, off_t ptr); off_t trie_get_index_offset(const void *t); + +/* ---------------------------------------------------------------------- + * Utility functions not directly involved with the trie. + */ + +/* + * Given a pathname in a buffer, adjust the pathname in place so + * that it points at a string which, when passed to trie_before, + * will reliably return the index of the thing that comes after + * that pathname and all its descendants. Usually this is done by + * suffixing ^A (since foo^A is guaranteeably the first thing that + * sorts after foo and foo/stuff); however, if the pathname + * actually ends in a path separator (e.g. if it's just "/"), that + * must be stripped off first. + */ +void make_successor(char *pathbuf);