Fix the various issues centring around the anomaly in Unix pathname

author simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Sat, 1 Nov 2008 17:05:32 +0000 (17:05 +0000)

committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Sat, 1 Nov 2008 17:05:32 +0000 (17:05 +0000)
author simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Sat, 1 Nov 2008 17:05:32 +0000 (17:05 +0000)
committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Sat, 1 Nov 2008 17:05:32 +0000 (17:05 +0000)
diff --git a/TODO b/TODO

index fe0cb71..51c00d2 100644 (file)
--- a/TODO
+++ b/TODO
@@ -25,22 +25,6 @@ Before it's non-embarrassingly releasable:
      + configurable recursive output depth
      + show the right bits last
  
- - figure out what to do about scans starting in the root directory
-    + Currently we end up with a double leading slash on the
-      pathnames, which is ugly, and we also get a zero-length href
-      in between those slashes which means the web interface doesn't
-      let you click back up to the top level at all.
-    + One big problem here is that a lot of the code assumes that
-      you can find the extent of a pathname by searching for "foo"
-      and "foo^A", trusting that anything inside the directory will
-      begin "foo/". So I'd need to consistently fix this everywhere
-      so that a trailing slash is disregarded while doing it, but
-      not actually removed.
-    + The text output gets it all wrong.
-    + The HTML output is fiddly even at the design stage: where
-      would I _ideally_ put the link to click on to get back to /?
-      It's unclear!
-
   - cross-Unix portability:
      + use autoconf
         * configure use of stat64
diff --git a/agedu.c b/agedu.c

index 246f002..a3c2db6 100644 (file)
--- a/agedu.c
+++ b/agedu.c
@@ -171,7 +171,8 @@ static void text_query(const void *mappedfile, const char *querydir,
       * (inclusive) and that filename with a ^A on the end
       * (exclusive). So find the x indices for each.
       */
-    sprintf(pathbuf, "%s\001", querydir);
+    strcpy(pathbuf, querydir);
+    make_successor(pathbuf);
      xi1 = trie_before(mappedfile, querydir);
      xi2 = trie_before(mappedfile, pathbuf);
  
@@ -196,7 +197,7 @@ static void text_query(const void *mappedfile, const char *querydir,
         while (xi1 < xi2) {
             trie_getpath(mappedfile, xi1, pathbuf);
             text_query(mappedfile, pathbuf, t, depth-1);
-           strcat(pathbuf, "\001");
+           make_successor(pathbuf);
             xi1 = trie_before(mappedfile, pathbuf);
         }
      }
diff --git a/du.c b/du.c

index da16e53..88edd27 100644 (file)
--- a/du.c
+++ b/du.c
@@ -114,7 +114,16 @@ static void du_recurse(char **path, size_t pathlen, size_t *pathsize,
             *pathsize = newpathlen * 3 / 2 + 256;
             *path = sresize(*path, *pathsize, char);
         }
-       sprintf(*path + pathlen, "/%s", names[i]);
+       /*
+        * Avoid duplicating a slash if we got a trailing one to
+        * begin with (i.e. if we're starting the scan in '/' itself).
+        */
+       if (pathlen > 0 && (*path)[pathlen-1] == '/') {
+           strcpy(*path + pathlen, names[i]);
+           newpathlen--;
+       } else {
+           sprintf(*path + pathlen, "/%s", names[i]);
+       }
  
         du_recurse(path, newpathlen, pathsize, gotdata, gotdata_ctx);
  
diff --git a/html.c b/html.c

index bc9905f..be13590 100644 (file)
--- a/html.c
+++ b/html.c
@@ -166,12 +166,14 @@ static void get_indices(const void *t, char *path,
                         unsigned long *xi1, unsigned long *xi2)
  {
      size_t pathlen = strlen(path);
+    int c1 = path[pathlen], c2 = (pathlen > 0 ? path[pathlen-1] : 0);
  
      *xi1 = trie_before(t, path);
-    path[pathlen] = '\001';
-    path[pathlen+1] = '\0';
+    make_successor(path);
      *xi2 = trie_before(t, path);
-    path[pathlen] = '\0';
+    path[pathlen] = c1;
+    if (pathlen > 0)
+       path[pathlen-1] = c2;
  }
  
  static unsigned long long fetch_size(const void *t, char *path,
@@ -393,7 +395,7 @@ char *html_query(const void *t, unsigned long index,
      struct html actx, *ctx = &actx;
      char *path, *path2, *p, *q, *href;
      char agebuf1[80], agebuf2[80];
-    size_t pathlen, hreflen;
+    size_t pathlen, subdirpos, hreflen;
      unsigned long index2;
      int i;
      struct vector **vecs;
@@ -444,13 +446,22 @@ char *html_query(const void *t, unsigned long index,
       */
      htprintf(ctx, "<p align=center>\n<code>");
      q = path;
-    for (p = strchr(path, pathsep); p; p = strchr(p+1, pathsep)) {
+    for (p = strchr(path, pathsep); p; p = strchr(p, pathsep)) {
         int doing_href = 0;
+       char c, *zp;
+
         /*
          * See if this path prefix exists in the trie. If so,
          * generate a hyperlink.
          */
-       *p = '\0';
+       zp = p;
+       if (p == path)                 /* special case for "/" at start */
+           zp++;
+
+       p++;
+
+       c = *zp;
+       *zp = '\0';
         index2 = trie_before(t, path);
         trie_getpath(t, index2, path2);
         if (!strcmp(path, path2) && cfg->format) {
@@ -458,12 +469,12 @@ char *html_query(const void *t, unsigned long index,
             htprintf(ctx, "<a href=\"%s\">", href);
             doing_href = 1;
         }
-       *p = pathsep;
-       htescape(ctx, q, p - q, 1);
-       q = p + 1;
+       *zp = c;
+       htescape(ctx, q, zp - q, 1);
         if (doing_href)
             htprintf(ctx, "</a>");
-       htescape(ctx, p, 1, 1);
+       htescape(ctx, zp, p - zp, 1);
+       q = p;
      }
      htescape(ctx, q, strlen(q), 1);
      htprintf(ctx, "</code>\n");
@@ -529,6 +540,9 @@ char *html_query(const void *t, unsigned long index,
      get_indices(t, path, &xi1, &xi2);
      xi1++;
      pathlen = strlen(path);
+    subdirpos = pathlen + 1;
+    if (pathlen > 0 && path[pathlen-1] == pathsep)
+       subdirpos--;
      while (xi1 < xi2) {
         trie_getpath(t, xi1, path2);
         get_indices(t, ctx->path2, &xj1, &xj2);
@@ -540,7 +554,7 @@ char *html_query(const void *t, unsigned long index,
             vecs = sresize(vecs, vecsize, struct vector *);
         }
         assert(strlen(path2) > pathlen);
-       vecs[nvecs] = make_vector(ctx, path2, 1, path2 + pathlen + 1);
+       vecs[nvecs] = make_vector(ctx, path2, 1, path2 + subdirpos);
         for (i = 0; i <= MAXCOLOUR; i++)
             vecs[0]->sizes[i] -= vecs[nvecs]->sizes[i];
         nvecs++;
diff --git a/trie.c b/trie.c

index acce8be..0466a34 100644 (file)
--- a/trie.c
+++ b/trie.c
@@ -617,3 +617,12 @@ off_t trie_get_index_offset(const void *t)
  {
      return ((const struct trie_header *)t)->indexroot;
  }
+
+void make_successor(char *pathbuf)
+{
+    int len = strlen(pathbuf);
+    if (len > 0 && pathbuf[len-1] == pathsep)
+       len--;
+    pathbuf[len] = '\001';
+    pathbuf[len+1] = '\0';
+}
diff --git a/trie.h b/trie.h

index 40f6f75..ef52a08 100644 (file)
--- a/trie.h
+++ b/trie.h
@@ -107,3 +107,19 @@ void triewalk_free(triewalk *tw);
   */
  void trie_set_index_offset(void *t, off_t ptr);
  off_t trie_get_index_offset(const void *t);
+
+/* ----------------------------------------------------------------------
+ * Utility functions not directly involved with the trie.
+ */
+
+/*
+ * Given a pathname in a buffer, adjust the pathname in place so
+ * that it points at a string which, when passed to trie_before,
+ * will reliably return the index of the thing that comes after
+ * that pathname and all its descendants. Usually this is done by
+ * suffixing ^A (since foo^A is guaranteeably the first thing that
+ * sorts after foo and foo/stuff); however, if the pathname
+ * actually ends in a path separator (e.g. if it's just "/"), that
+ * must be stripped off first.
+ */
+void make_successor(char *pathbuf);
author	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Sat, 1 Nov 2008 17:05:32 +0000 (17:05 +0000)
committer	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Sat, 1 Nov 2008 17:05:32 +0000 (17:05 +0000)
TODO		patch \| blob \| blame \| history
agedu.c		patch \| blob \| blame \| history
du.c		patch \| blob \| blame \| history
html.c		patch \| blob \| blame \| history
trie.c		patch \| blob \| blame \| history
trie.h		patch \| blob \| blame \| history