Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / agedu.c
diff --git a/agedu.c b/agedu.c
index a8269a9..1847e2c 100644 (file)
--- a/agedu.c
+++ b/agedu.c
@@ -309,6 +309,8 @@ static void text_query(const void *mappedfile, const char *querydir,
        HELPARG("directory") HELPOPT("scan only, generating a dump") \
     VAL(HTML) SHORT(H) LONG(html) \
        HELPARG("subdir") HELPOPT("print an HTML report on a subdirectory") \
+    NOVAL(CGI) LONG(cgi) \
+        HELPOPT("do the right thing when run from a CGI script") \
     HELPPFX("options") \
     VAL(DATAFILE) SHORT(f) LONG(file) \
         HELPARG("filename") HELPOPT("[most modes] specify index file") \
@@ -339,6 +341,8 @@ static void text_query(const void *mappedfile, const char *querydir,
         HELPOPT("[--scan,--load] keep real atimes on directories") \
     NOVAL(NODIRATIME) LONG(no_dir_atime) LONG(no_dir_atimes) \
         HELPOPT("[--scan,--load] fake atimes on directories") \
+    NOVAL(NOEOF) LONG(no_eof) LONG(noeof) \
+        HELPOPT("[--web] do not close web server on EOF") \
     NOVAL(MTIME) LONG(mtime) \
         HELPOPT("[--scan] use mtime instead of atime") \
     NOVAL(SHOWFILES) LONG(files) \
@@ -357,6 +361,8 @@ static void text_query(const void *mappedfile, const char *querydir,
         HELPARG("filename") HELPOPT("[--web] read HTTP Basic user/pass from file") \
     VAL(AUTHFD) LONG(auth_fd) \
         HELPARG("fd") HELPOPT("[--web] read HTTP Basic user/pass from fd") \
+    VAL(HTMLTITLE) LONG(title) \
+        HELPARG("title") HELPOPT("[--web,--html] title prefix for web pages") \
     VAL(DEPTH) SHORT(d) LONG(depth) LONG(max_depth) LONG(maximum_depth) \
         HELPARG("levels") HELPOPT("[--text,--html] recurse to this many levels") \
     VAL(MINAGE) SHORT(a) LONG(age) LONG(min_age) LONG(minimum_age) \
@@ -391,8 +397,8 @@ enum { OPTIONS(IGNORE,IGNORE,IGNORE,LONGTMP) NLONGOPTS };
 static const int opthasval[NOPTIONS] = {OPTIONS(ZERO,ONE,IGNORE,IGNORE)};
 static const char shortopts[] = {OPTIONS(IGNORE,IGNORE,STRINGNOCOMMA,IGNORE)};
 static const char *const longopts[] = {OPTIONS(IGNORE,IGNORE,IGNORE,STRING)};
-enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) };
-enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) };
+enum { OPTIONS(SHORTNEWOPT,SHORTNEWOPT,SHORTTHISOPT,IGNORE) UNUSEDENUMVAL1 };
+enum { OPTIONS(LONGNEWOPT,LONGNEWOPT,IGNORE,LONGTHISOPT) UNUSEDENUMVAL2 };
 static const int shortvals[] = {OPTIONS(IGNORE,IGNORE,SHORTOPTVAL,IGNORE)};
 static const int longvals[] = {OPTIONS(IGNORE,IGNORE,IGNORE,LONGOPTVAL)};
 
@@ -496,10 +502,11 @@ int main(int argc, char **argv)
     time_t now = time(NULL);
     time_t textcutoff = now, htmlnewest = now, htmloldest = now;
     int htmlautoagerange = 1;
-    const char *httpserveraddr = NULL;
-    int httpserverport = 0;
+    const char *httpserveraddr = "localhost";
+    const char *httpserverport = NULL;
     const char *httpauthdata = NULL;
     const char *outfile = NULL;
+    const char *html_title = PNAME;
     int auth = HTTPD_AUTH_MAGIC | HTTPD_AUTH_BASIC;
     int progress = 1;
     struct inclusion_exclusion *inex = NULL;
@@ -508,6 +515,7 @@ int main(int argc, char **argv)
     int depth = -1, gotdepth = 0;
     int fakediratimes = 1;
     int mtime = 0;
+    int closeoneof = 1;
     int showfiles = 0;
 
 #ifdef DEBUG_MAD_OPTION_PARSING_MACROS
@@ -662,8 +670,6 @@ int main(int argc, char **argv)
 
                        for (i = 0; licence[i]; i++)
                            fputs(licence[i], stdout);
-
-                       return 0;
                    }
                    return 0;
                  case OPT_SCAN:
@@ -712,12 +718,14 @@ int main(int argc, char **argv)
                    nactions++;
                    break;
                  case OPT_HTML:
+                 case OPT_CGI:
                    if (nactions >= actionsize) {
                        actionsize = nactions * 3 / 2 + 16;
                        actions = sresize(actions, actionsize, struct action);
                    }
                    actions[nactions].mode = HTML;
-                   actions[nactions].arg = optval;
+                   actions[nactions].arg = (optid == OPT_HTML ? optval :
+                                            NULL);
                    nactions++;
                    break;
                  case OPT_HTTPD:
@@ -765,6 +773,9 @@ int main(int argc, char **argv)
                  case OPT_MTIME:
                    mtime = 1;
                    break;
+                  case OPT_NOEOF:
+                    closeoneof = 0;
+                    break;
                  case OPT_DATAFILE:
                    filename = optval;
                    break;
@@ -783,6 +794,9 @@ int main(int argc, char **argv)
                  case OPT_OUTFILE:
                    outfile = optval;
                    break;
+                  case OPT_HTMLTITLE:
+                    html_title = optval;
+                    break;
                  case OPT_MINAGE:
                    textcutoff = parse_age(now, optval);
                    break;
@@ -807,10 +821,13 @@ int main(int argc, char **argv)
                        else
                            port = optval;
                        port += strcspn(port, ":");
-                       if (port)
+                       if (port && *port)
                            *port++ = '\0';
-                       httpserveraddr = optval;
-                       httpserverport = atoi(port);
+                        if (!strcmp(optval, "ANY"))
+                            httpserveraddr = NULL;
+                        else
+                            httpserveraddr = optval;
+                       httpserverport = port;
                    }
                    break;
                  case OPT_AUTH:
@@ -1163,9 +1180,6 @@ int main(int argc, char **argv)
                        triewalk_rebase(tw, mappedfile);
                        diff = (const unsigned char *)mappedfile -
                            (const unsigned char *)oldfile;
-                       if (prevtf)
-                           prevtf = (const struct trie_file *)
-                               (((const unsigned char *)prevtf) + diff);
                        if (tf)
                            tf = (const struct trie_file *)
                                (((const unsigned char *)tf) + diff);
@@ -1232,7 +1246,8 @@ int main(int argc, char **argv)
                indexbuild_free(ib);
 
                munmap(mappedfile, totalsize);
-               ftruncate(fd, realsize);
+               if (ftruncate(fd, realsize) < 0)
+                    fatal("%s: truncate: %s\n", filename, strerror(errno));
                close(fd);
                printf("Final index file size = %llu bytes\n",
                       (unsigned long long)realsize);
@@ -1257,6 +1272,11 @@ int main(int argc, char **argv)
                perror(PNAME ": mmap");
                return 1;
            }
+            if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               return 1;
+            }
            pathsep = trie_pathsep(mappedfile);
 
            /*
@@ -1296,16 +1316,63 @@ int main(int argc, char **argv)
            if (fd < 0) {
                fprintf(stderr, "%s: %s: open: %s\n", PNAME, filename,
                        strerror(errno));
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
                return 1;
            }
            if (fstat(fd, &st) < 0) {
-               perror(PNAME ": fstat");
+               fprintf(stderr, "%s: %s: fstat: %s\n", PNAME, filename,
+                       strerror(errno));
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
                return 1;
            }
            totalsize = st.st_size;
            mappedfile = mmap(NULL, totalsize, PROT_READ, MAP_SHARED, fd, 0);
            if (!mappedfile) {
-               perror(PNAME ": mmap");
+               fprintf(stderr, "%s: %s: mmap: %s\n", PNAME, filename,
+                       strerror(errno));
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
+               return 1;
+           }
+           if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               if (!querydir) {
+                   printf("Status: 500\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>500 Internal Server Error</title>"
+                          "</head><body>"
+                          "<h1>500 Internal Server Error</h1>"
+                          "<p><code>agedu</code> suffered an internal error."
+                          "</body></html>\n");
+                   return 0;
+               }
                return 1;
            }
            pathsep = trie_pathsep(mappedfile);
@@ -1313,38 +1380,130 @@ int main(int argc, char **argv)
            maxpathlen = trie_maxpathlen(mappedfile);
            pathbuf = snewn(maxpathlen, char);
 
-           /*
-            * Trim trailing slash, just in case.
-            */
-           pathlen = strlen(querydir);
-           if (pathlen > 0 && querydir[pathlen-1] == pathsep)
-               querydir[--pathlen] = '\0';
-
-           xi = trie_before(mappedfile, querydir);
-           if (xi >= trie_count(mappedfile) ||
-               (trie_getpath(mappedfile, xi, pathbuf),
-                strcmp(pathbuf, querydir))) {
-               fprintf(stderr, "%s: pathname '%s' does not exist in index\n"
-                       "%*s(check it is spelled exactly as it is in the "
-                       "index, including\n%*sany leading './')\n",
-                       PNAME, querydir,
-                       (int)(1+sizeof(PNAME)), "",
-                       (int)(1+sizeof(PNAME)), "");
-           } else if (!index_has_root(mappedfile, xi)) {
-               fprintf(stderr, "%s: pathname '%s' is"
-                       " a file, not a directory\n", PNAME, querydir);
-           } else if (!gotdepth) {
+           if (!querydir || !gotdepth) {
                /*
                 * Single output file.
                 */
-               cfg.format = NULL;
-               cfg.rootpage = NULL;
+               if (!querydir) {
+                    cfg.uriformat = "/%|/%p/%|%|/%p";
+               } else {
+                   cfg.uriformat = NULL;
+               }
                cfg.autoage = htmlautoagerange;
                cfg.oldest = htmloldest;
                cfg.newest = htmlnewest;
                cfg.showfiles = showfiles;
-               html = html_query(mappedfile, xi, &cfg, 0);
-               if (outfile != NULL) {
+           } else {
+                cfg.uriformat = "/index.html%|/%/p.html";
+                cfg.fileformat = "/index.html%|/%/p.html";
+               cfg.autoage = htmlautoagerange;
+               cfg.oldest = htmloldest;
+               cfg.newest = htmlnewest;
+               cfg.showfiles = showfiles;
+           }
+            cfg.html_title = html_title;
+
+           if (!querydir) {
+               /*
+                * If we're run in --cgi mode, read PATH_INFO to get
+                * a numeric pathname index.
+                */
+               char *path_info = getenv("PATH_INFO");
+
+               if (!path_info)
+                   path_info = "";
+
+                /*
+                 * Parse the path.
+                 */
+                if (!html_parse_path(mappedfile, path_info, &cfg, &xi)) {
+                   printf("Status: 404\nContent-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>404 Not Found</title>"
+                          "</head><body>"
+                          "<h1>400 Not Found</h1>"
+                          "<p>Invalid <code>agedu</code> pathname."
+                          "</body></html>\n");
+                   return 0;
+               }
+
+               /*
+                * If the path was parseable but not canonically
+                * expressed, return a redirect to the canonical
+                * version.
+                */
+                char *canonpath = html_format_path(mappedfile, &cfg, xi);
+               if (strcmp(canonpath, path_info)) {
+                   char *servername = getenv("SERVER_NAME");
+                   char *scriptname = getenv("SCRIPT_NAME");
+                   if (!servername || !scriptname) {
+                       if (servername)
+                           fprintf(stderr, "%s: SCRIPT_NAME unset\n", PNAME);
+                       else if (scriptname)
+                           fprintf(stderr, "%s: SCRIPT_NAME unset\n", PNAME);
+                       else
+                           fprintf(stderr, "%s: SERVER_NAME and "
+                                   "SCRIPT_NAME both unset\n", PNAME);
+                       printf("Status: 500\nContent-type: text/html\n\n"
+                              "<html><head>"
+                              "<title>500 Internal Server Error</title>"
+                              "</head><body>"
+                              "<h1>500 Internal Server Error</h1>"
+                              "<p><code>agedu</code> suffered an internal "
+                              "error."
+                              "</body></html>\n");
+                       return 0;
+                   }
+                   printf("Status: 301\n"
+                          "Location: http://%s/%s%s\n"
+                          "Content-type: text/html\n\n"
+                          "<html><head>"
+                          "<title>301 Moved</title>"
+                          "</head><body>"
+                          "<h1>301 Moved</h1>"
+                          "<p>Moved."
+                          "</body></html>\n",
+                          servername, scriptname, canonpath);
+                   return 0;
+               }
+
+           } else {
+               /*
+                * In ordinary --html mode, process a query
+                * directory passed in on the command line.
+                */
+
+               /*
+                * Trim trailing slash, just in case.
+                */
+               pathlen = strlen(querydir);
+               if (pathlen > 0 && querydir[pathlen-1] == pathsep)
+                   querydir[--pathlen] = '\0';
+
+               xi = trie_before(mappedfile, querydir);
+               if (xi >= trie_count(mappedfile) ||
+                   (trie_getpath(mappedfile, xi, pathbuf),
+                    strcmp(pathbuf, querydir))) {
+                   fprintf(stderr, "%s: pathname '%s' does not exist in index\n"
+                           "%*s(check it is spelled exactly as it is in the "
+                           "index, including\n%*sany leading './')\n",
+                           PNAME, querydir,
+                           (int)(1+sizeof(PNAME)), "",
+                           (int)(1+sizeof(PNAME)), "");
+                   return 1;
+               } else if (!index_has_root(mappedfile, xi)) {
+                   fprintf(stderr, "%s: pathname '%s' is"
+                           " a file, not a directory\n", PNAME, querydir);
+                   return 1;
+               }
+           }
+
+           if (!querydir || !gotdepth) {
+               /*
+                * Single output file.
+                */
+               html = html_query(mappedfile, xi, &cfg, 1);
+               if (querydir && outfile != NULL) {
                    FILE *fp = fopen(outfile, "w");
                    if (!fp) {
                        fprintf(stderr, "%s: %s: open: %s\n", PNAME,
@@ -1361,6 +1520,9 @@ int main(int argc, char **argv)
                        return 1;
                    }
                } else {
+                   if (!querydir) {
+                       printf("Content-type: text/html\n\n");
+                   }
                    fputs(html, stdout);
                }
            } else {
@@ -1380,15 +1542,15 @@ int main(int argc, char **argv)
                    snprintf(prefix, dirlen, "./");
 
                unsigned long xi2;
+               /*
+                * pathbuf is only set up in the plain-HTML case and
+                * not in the CGI case; but that's OK, because the
+                * CGI case can't come to this branch of the if
+                * anyway.
+                */
                make_successor(pathbuf);
                xi2 = trie_before(mappedfile, pathbuf);
 
-               cfg.format = "%lu.html";
-               cfg.rootpage = "index.html";
-               cfg.autoage = htmlautoagerange;
-               cfg.oldest = htmloldest;
-               cfg.newest = htmlnewest;
-               cfg.showfiles = showfiles;
                if (html_dump(mappedfile, xi, xi2, depth, &cfg, prefix))
                    return 1;
            }
@@ -1415,6 +1577,11 @@ int main(int argc, char **argv)
                perror(PNAME ": mmap");
                return 1;
            }
+            if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               return 1;
+            }
            pathsep = trie_pathsep(mappedfile);
 
            maxpathlen = trie_maxpathlen(mappedfile);
@@ -1447,17 +1614,23 @@ int main(int argc, char **argv)
                perror(PNAME ": mmap");
                return 1;
            }
+            if (!trie_check_magic(mappedfile)) {
+               fprintf(stderr, "%s: %s: magic numbers did not match\n"
+                        "%s: check that the index was built by this version of agedu on this platform\n", PNAME, filename, PNAME);
+               return 1;
+            }
            pathsep = trie_pathsep(mappedfile);
 
            dcfg.address = httpserveraddr;
            dcfg.port = httpserverport;
+           dcfg.closeoneof = closeoneof;
            dcfg.basicauthdata = httpauthdata;
-           pcfg.format = NULL;
-           pcfg.rootpage = NULL;
+           pcfg.uriformat = "/%|/%p/%|%|/%p";
            pcfg.autoage = htmlautoagerange;
            pcfg.oldest = htmloldest;
            pcfg.newest = htmlnewest;
            pcfg.showfiles = showfiles;
+            pcfg.html_title = html_title;
            run_httpd(mappedfile, auth, &dcfg, &pcfg);
            munmap(mappedfile, totalsize);
        } else if (mode == REMOVE) {