From: Ian Jackson Date: Tue, 17 May 2016 17:05:37 +0000 (+0100) Subject: Merge branch 'mdw+fixes' X-Git-Url: https://git.distorted.org.uk/~mdw/userv-utils/commitdiff_plain/0324503152d6edb67572b45225745441913ae55e?hp=f34db0d4628fd5a077ac48a627b4491c5d933d62 Merge branch 'mdw+fixes' --- diff --git a/www-cgi/README.custom-env-filter b/www-cgi/README.custom-env-filter new file mode 100644 index 0000000..58ef39b --- /dev/null +++ b/www-cgi/README.custom-env-filter @@ -0,0 +1,27 @@ +Allow customization of the environment filters. + +Sites can configure ucgi's environment filters, and end users can +configure ucgitarget's filters. + +By default, ucgi will look in /etc/userv/ucgi.env-filter, but if +UCGI_ENV_FILTER is set in its environment, it will look there +instead. The filter may contain wildcards and so on. + +By default, ucgitarget looks in .userv/ucgitarget.env-filter, or +/etc/userv/ucgitarget.env-filter, if the former doesn't exist; but if +passed a `-e FILTER' option on its command line, it will look in the +file FILTER instead. This filter may /not/ contain wildcards. + +In both cases, if an explicitly named filter file can't be found then +the program fails; if the default filter files can't be found then they +fall back to built-in lists. + +The reason for the asymmetry in interfaces is: it's hard to pass +command-line options to CGI scripts from webservers, but pretty easy to +set environment variables; whereas it's hard to pass environment +variables to a service program in a Userv configuration file, but easy +to pass command-line arguments. + + +The `?DEFAULTS' pattern can be specified to match the default set +(which is different in `ucgi' and `ucgitarget'). diff --git a/www-cgi/ucgi.c b/www-cgi/ucgi.c index 8d6fa0d..9433020 100644 --- a/www-cgi/ucgi.c +++ b/www-cgi/ucgi.c @@ -31,14 +31,58 @@ #include "ucgi.h" +static const char *const default_envok[] = { + "AUTH_TYPE", + "CONTENT_TYPE", + "CONTENT_LENGTH", + "DOCUMENT_ROOT", + "GATEWAY_INTERFACE", + "HTTP_*", + "HTTPS", + "PATH_INFO", + "PATH_TRANSLATED", + "QUERY_STRING", + "REDIRECT_*", + "REMOTE_*", + "REQUEST_METHOD", + "REQUEST_URI", + "SCRIPT_*", + "SERVER_*", + "SSL_*", + 0 +}; + +struct buildargs { + const char **v; + int n, max; +}; + +static void addarg(struct buildargs *args, const char *a) { + if (args->n > args->max) error("too many arguments", 500); + args->v[args->n++]= a; +} + +static void add_userv_var(const char *fulln, + const char *en, const char *ev, void *p) { + struct buildargs *args= p; + size_t l; + char *a; + + l= strlen(ev); + if (l > MAX_ENVVAR_VALUE) error("environment variable too long", 500); + a= xmalloc(strlen(en)+l+6); + sprintf(a,"-DE_%s=%s",en,ev); + addarg(args, a); +} + int main(int argc, const char **argv) { - char *defarg, *username; - const char *slash2, *pathi, *ev, *en, *av; - const char *const *ep; - const char **arguments; + char *username; + const char *slash2, *pathi, *ev, *av; + const char *const *envok = 0; size_t usernamelen, l; + struct buildargs args; pid_t child, rchild; - int nargs, status; + int status; l= strlen(argv[0]); if (l>6 && !strcmp(argv[0]+l-6,"-debug")) debugmode= 1; @@ -47,41 +91,54 @@ int main(int argc, const char **argv) { if (fputs("Content-Type: text/plain\n\n",stdout)==EOF || fflush(stdout)) syserror("write stdout"); if (dup2(1,2)<0) { perror("dup stdout to stderr"); exit(-1); } + D( printf(";;; UCGI\n"); ) } - if (argc > MAX_ARGS) error("too many arguments"); + if (argc > MAX_ARGS) error("too many arguments", 500); + + ev= getenv("UCGI_ENV_FILTER"); + if (ev) + envok= load_filters(LOADF_MUST, ev, LF_END); + else + envok= load_filters(0, "/etc/userv/ucgi.env-filter", LF_END); pathi= getenv("PATH_INFO"); - if (!pathi) error("PATH_INFO not found"); - if (pathi[0] != '/' || pathi[1] != '~') error("PATH_INFO must start with /~"); - slash2= strchr(pathi+2,'/'); if (!slash2) error("PATH_INFO must have more than one /"); + if (!pathi) error("PATH_INFO not found", 500); + D( if (debugmode) { + printf(";; find user name...\n" + ";; initial PATH_INFO = `%s'\n", + pathi); + } ) + if (pathi[0] != '/' || pathi[1] != '~') + error("PATH_INFO must start with /~", 400); + slash2= strchr(pathi+2,'/'); + if (!slash2) error("PATH_INFO must have more than one /", 400); usernamelen= slash2-(pathi+2); - if (usernamelen > MAX_USERNAME_LEN) error("PATH_INFO username too long"); + if (usernamelen > MAX_USERNAME_LEN) error("PATH_INFO username too long", 400); username= xmalloc(usernamelen+1); memcpy(username,pathi+2,usernamelen); username[usernamelen]= 0; - if (!isalpha(username[0])) error("username 1st character is not alphabetic"); + D( if (debugmode) + printf(";; user = `%s'; tail = `%s'\n", username, slash2); ) + if (!isalpha(username[0])) + error("username 1st character is not alphabetic", 400); xsetenv("PATH_INFO",slash2,1); + + args.n= 0; args.max= argc + MAX_ENVVARS + 10; + args.v= xmalloc(args.max * sizeof(*args.v)); - arguments= xmalloc(sizeof(const char*)*(nenvok+argc+10)); - nargs= 0; - - arguments[nargs++]= "userv"; - if (debugmode) arguments[nargs++]= "-DDEBUG=1"; - - for (ep= envok; (en= *ep); ep++) { - ev= getenv(en); if (!ev) continue; - l= strlen(ev); if (l > MAX_ENVVAR_VALUE) error("environment variable too long"); - defarg= xmalloc(strlen(en)+l+6); - sprintf(defarg,"-DE_%s=%s",en,ev); - arguments[nargs++]= defarg; - } + addarg(&args, "userv"); + if (debugmode) addarg(&args, "-DDEBUG=1"); - arguments[nargs++]= username; - arguments[nargs++]= "www-cgi"; - while ((av= (*++argv))) arguments[nargs++]= av; - arguments[nargs++]= 0; + filter_environment(FILTF_WILDCARD, "", envok, default_envok, + add_userv_var, &args); + + addarg(&args, username); + addarg(&args, "www-cgi"); + while ((av= (*++argv))) addarg(&args, av); + addarg(&args, 0); if (debugmode) { + D( fflush(stdout); ) child= fork(); if (child==-1) syserror("fork"); if (child) { rchild= waitpid(child,&status,0); @@ -91,7 +148,16 @@ int main(int argc, const char **argv) { } } - execvp("userv",(char*const*)arguments); + D( if (debugmode) { + int i; + + printf(";; final command line...\n"); + for (i = 0; args.v[i]; i++) + printf(";; %s\n", args.v[i]); + fflush(stdout); + } ) + + execvp("userv",(char*const*)args.v); syserror("exec userv"); return -1; } diff --git a/www-cgi/ucgi.h b/www-cgi/ucgi.h index ed225fd..dd90b23 100644 --- a/www-cgi/ucgi.h +++ b/www-cgi/ucgi.h @@ -24,18 +24,37 @@ #include +#ifdef DEBUG +# define D(x) x +#else +# define D(x) +#endif + #define MAX_ARGS 1024 #define MAX_USERNAME_LEN 1024 #define MAX_SCRIPTPATH_LEN 1024 +#define MAX_ENVVAR_NAME 128 #define MAX_ENVVAR_VALUE (1024*1024) +#define MAX_ENVVARS 256 void syserror(const char *m); -void error(const char *m); +void error(const char *m, int st); void *xmalloc(size_t sz); void xsetenv(const char *en, const char *ev, int overwrite); +void *xrealloc(void *ptr, size_t sz); + +const char **load_filters(unsigned flags, const char *first, ...); +#define LOADF_MUST 1u +#define LF_END ((const char *)0) + +void filter_environment(unsigned flags, const char *prefix_in, + const char *const *patv, + const char *const *defaults, + void (*foundone)(const char *fulln, const char *en, + const char *ev, void *p), + void *p); +#define FILTF_WILDCARD 1u -extern const char *const envok[]; -extern const int nenvok; extern int debugmode; #endif diff --git a/www-cgi/ucgicommon.c b/www-cgi/ucgicommon.c index 090cc03..4d37187 100644 --- a/www-cgi/ucgicommon.c +++ b/www-cgi/ucgicommon.c @@ -19,59 +19,16 @@ * along with userv-utils; if not, see http://www.gnu.org/licenses/. */ +#include +#include +#include #include #include #include -#include "ucgi.h" +#include -const char *const envok[]= { - "AUTH_TYPE", - "CONTENT_LENGTH", - "CONTENT_TYPE", - "DOCUMENT_ROOT", - "GATEWAY_INTERFACE", - "HTTP_ACCEPT", - "HTTP_ACCEPT_CHARSET", - "HTTP_ACCEPT_ENCODING", - "HTTP_ACCEPT_LANGUAGE", - "HTTP_CACHE_CONTROL", - "HTTP_CONNECTION", - "HTTP_CONTENT_ENCODING", - "HTTP_COOKIE", - "HTTP_DNT", - "HTTP_HOST", - "HTTP_KEEP_ALIVE", - "HTTP_NEGOTIATE", - "HTTP_PRAGMA", - "HTTP_REFERER", - "HTTP_USER_AGENT", - "HTTP_VIA", - "HTTP_X_FORWARDED_FOR", - "HTTPS", - "PATH_INFO", - "PATH_TRANSLATED", - "QUERY_STRING", - "REMOTE_ADDR", - "REMOTE_HOST", - "REMOTE_USER", - "REMOTE_IDENT", - "REQUEST_METHOD", - "REQUEST_URI", - "SCRIPT_FILENAME", - "SCRIPT_NAME", - "SCRIPT_URI", - "SCRIPT_URL", - "SERVER_ADDR", - "SERVER_ADMIN", - "SERVER_NAME", - "SERVER_PORT", - "SERVER_PROTOCOL", - "SERVER_SIGNATURE", - "SERVER_SOFTWARE", - 0 -}; -const int nenvok= sizeof(envok)/sizeof(envok[0]); +#include "ucgi.h" int debugmode= 0; @@ -81,18 +38,20 @@ static void outerror(void) { } void syserror(const char *m) { - if (printf("Content-Type: text/plain\n\n" + if (printf("Content-Type: text/plain\n" + "Status: 500\n\n" "ucgi: system call error:\n" "%s: %s\n", m,strerror(errno))==EOF || fflush(stdout)) outerror(); exit(0); } -void error(const char *m) { - if (printf("Content-Type: text/plain\n\n" +void error(const char *m, int st) { + if (printf("Content-Type: text/plain\n" + "Status: %d\n\n" "ucgi: error:\n" "%s\n", - m)==EOF || fflush(stdout)) outerror(); + st, m)==EOF || fflush(stdout)) outerror(); exit(0); } @@ -104,6 +63,158 @@ void *xmalloc(size_t sz) { return r; } +void *xrealloc(void *ptr, size_t sz) { + void *r; + + r= realloc(ptr,sz); + if (!r) syserror("realloc failed"); + return r; +} + void xsetenv(const char *en, const char *ev, int overwrite) { if (setenv(en,ev,overwrite)) syserror("setenv"); } + +const char **load_filters(unsigned flags, const char *first, ...) { + va_list ap; + const char *name, *p, *q, **v; + char *pp; + size_t l, n, sz; + FILE *fp; + char buf[MAX_ENVVAR_NAME]; + + D( if (debugmode) printf(";; load_filters...\n"); ) + va_start(ap, first); + for (name= first; name; name= va_arg(ap, const char *)) { + fp= fopen(name, "r"); if (fp) goto opened; + D( if (debugmode) + printf(";; file `%s': %s\n", name, strerror(errno)); ) + if (errno != ENOENT) syserror("failed to open environment filters"); + } + va_end(ap); + if (flags & LOADF_MUST) syserror("failed to open environment filters"); + D( if (debugmode) printf(";; using default filters\n"); ) + return 0; + +opened: + va_end(ap); + D( if (debugmode) printf(";; file `%s': OK\n", name); ) + + n= 0; sz= 128; v= xmalloc(sz * sizeof(*v)); + for (;;) { + if (!fgets(buf, sizeof(buf), fp)) break; + l= strlen(buf); + if (buf[l - 1] == '\n') buf[--l]= 0; + if (l + 1 == sizeof(buf)) + error("line too long in environment filter file", 500); + p= buf; q= p + l; + while (isspace((unsigned char)*p)) p++; + while (q > p && isspace((unsigned char)q[-1])) q--; + if (*p == '#' || p == q) continue; + l= q - p; + pp= xmalloc(l + 1); + memcpy(pp, p, l); + pp[l]= 0; + v[n++]= pp; + D( if (debugmode) printf(";; filter: `%s'\n", pp); ) + if (n >= sz) { + sz *= 2; + v= xrealloc(v, sz * sizeof(*v)); + } + } + if (ferror(fp)) syserror("failed to read environment filters"); + fclose(fp); + return v; +} + +static int envvar_match(unsigned flags, const char *en, + const char *const *patv, + const char *const *defaults, + const char **ev) { + const char *const *patp; + const char *q, *pat; + int acceptp; + int rc; + + if (!patv) { patv= defaults; defaults= 0; } + for (patp= patv; (pat= *patp); patp++) { + q= en; + acceptp= 1; + if (*pat == '!' && (flags & FILTF_WILDCARD)) { acceptp= 0; pat++; } + else if (*pat == '?') { + if (strcmp(pat + 1, "DEFAULTS") == 0) { + assert(defaults); + rc= envvar_match(flags, en, defaults, 0, ev); + if (rc) return rc; + } else + error("unknown pattern directive", 500); + continue; + } + + for (;;) { + if (!*pat) { + if (*q != '=') { + D( if (debugmode) + printf(";; mismatch `%s' (prefix)\n", *patp); ) + break; + } + D( if (debugmode) printf(";; matched pattern `%s'\n", *patp); ) + goto match; + } else if (*pat == '*' && (flags & FILTF_WILDCARD)) { + q = strchr(q, '='); + if (!q) { + D( if (debugmode) + printf(";; mismatch `%s' (discard: no `=')\n", *patp); ) + return -1; + } + D( if (debugmode) + printf(";; wildcard match for `%s'\n", *patp); ) + goto match; + } else { + if (*pat++ != *q++) { + D( if (debugmode) printf(";; mismatch `%s'\n", *patp); ) + break; + } + } + } + } + return 0; + +match: + if (!acceptp) return -1; + *ev= q + 1; + return +1; +} + +void filter_environment(unsigned flags, const char *prefix_in, + const char *const *patv, + const char *const *defaults, + void (*foundone)(const char *fulln, + const char *en, const char *ev, + void *p), + void *p) { + char *const *ep; + const char *en, *ev; + char enbuf[MAX_ENVVAR_NAME]; + size_t n, pn = strlen(prefix_in); + + D( if (debugmode) printf(";; filter_environment...\n"); ) + for (ep= environ; (en= *ep); ep++) { + D( if (debugmode) printf(";; consider env-var `%s'\n", en); ) + if (strncmp(en, prefix_in, pn) != 0 || !en[pn]) { + D( if (debugmode) printf(";; doesn't match prefix\n"); ) + continue; + } + if (envvar_match(flags, en + pn, patv, defaults, &ev) > 0) { + n= strcspn(en, "="); + if (n >= sizeof(enbuf)) + error("environment variable name too long", 500); + memcpy(enbuf, en, n); + enbuf[n]= 0; + D( if (debugmode) + printf(";; full = `%s'; tail = `%s'; value = `%s'\n", + enbuf, enbuf + pn, ev); ) + foundone(enbuf, enbuf + pn, ev, p); + } + } +} diff --git a/www-cgi/ucgitarget.c b/www-cgi/ucgitarget.c index 7a4e8bc..a24867d 100644 --- a/www-cgi/ucgitarget.c +++ b/www-cgi/ucgitarget.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -33,71 +34,144 @@ #include "ucgi.h" -static void *xrealloc(void *ptr, size_t sz) { - void *r; +static const char *const default_envok[]= { + "AUTH_TYPE", + "CONTENT_LENGTH", + "CONTENT_TYPE", + "DOCUMENT_ROOT", + "GATEWAY_INTERFACE", + "HTTP_ACCEPT", + "HTTP_ACCEPT_CHARSET", + "HTTP_ACCEPT_ENCODING", + "HTTP_ACCEPT_LANGUAGE", + "HTTP_CACHE_CONTROL", + "HTTP_CONNECTION", + "HTTP_CONTENT_ENCODING", + "HTTP_COOKIE", + "HTTP_DNT", + "HTTP_HOST", + "HTTP_KEEP_ALIVE", + "HTTP_NEGOTIATE", + "HTTP_PRAGMA", + "HTTP_REFERER", + "HTTP_USER_AGENT", + "HTTP_VIA", + "HTTP_X_FORWARDED_FOR", + "HTTPS", + "PATH_INFO", + "PATH_TRANSLATED", + "QUERY_STRING", + "REDIRECT_HANDLER", + "REDIRECT_SCRIPT_URI", + "REDIRECT_SCRIPT_URL", + "REDIRECT_STATUS", + "REDIRECT_URL", + "REMOTE_ADDR", + "REMOTE_HOST", + "REMOTE_USER", + "REMOTE_IDENT", + "REQUEST_METHOD", + "REQUEST_URI", + "SCRIPT_FILENAME", + "SCRIPT_NAME", + "SCRIPT_URI", + "SCRIPT_URL", + "SERVER_ADDR", + "SERVER_ADMIN", + "SERVER_NAME", + "SERVER_PORT", + "SERVER_PROTOCOL", + "SERVER_SIGNATURE", + "SERVER_SOFTWARE", + "SSL_CIPHER", + "SSL_CLIENT_S_DN", + "SSL_CLIENT_VERIFY", + "SSL_PROTOCOL", + 0 +}; - r= realloc(ptr,sz); - if (!r) syserror("realloc failed"); - return r; +static void setenvar(const char *fulln, + const char *en, const char *ep, void *p) { + xsetenv(en, ep, 1); + unsetenv(fulln); } -int main(int argc, const char **argv) { - char *uservarn, *scriptpath, *newvar; - const char *nextslash, *lastslash, *pathi, *ev, *ev2, *en, *scriptdir, *av; - const char *const *ep; +int main(int argc, char **argv) { + char *scriptpath, *newvar; + const char *nextslash, *lastslash, *pathi, *ev, *ev2, *scriptdir, *av; + const char *const *envok; const char **arguments; - size_t scriptdirlen, scriptpathlen, l, uservarnl; + size_t scriptdirlen, scriptpathlen, l; struct stat stab; - int r, nargs; + int i, r, nargs; + const char *filters= 0; ev= getenv("USERV_U_DEBUG"); if (ev && *ev) debugmode= 1; - if (argc > MAX_ARGS) error("too many arguments"); + D( if (debugmode) printf(";;; UCGITARGET\n"); ) + if (argc > MAX_ARGS) error("too many arguments", 500); - if (!*++argv) error("no script directory argument"); - ev= getenv("HOME"); if (!ev) error("no HOME env. var"); + for (;;) { + i= getopt(argc, argv, "+e:"); if (i < 0) break; + switch (i) { + case 'e': filters= optarg; break; + default: error("bad command line", 500); break; + } + } + argc -= optind; argv += optind; + + if (!*argv) error("no script directory argument", 500); + ev= getenv("HOME"); if (!ev) error("no HOME env. var", 500); l= strlen(*argv)+strlen(ev); newvar= xmalloc(l+2); sprintf(newvar,"%s/%s",ev,*argv); scriptdir= newvar; scriptdirlen= strlen(scriptdir); - uservarn= 0; - uservarnl= 0; - for (ep= envok; (en= *ep); ep++) { - l= strlen(en)+11; - if (uservarnl MAX_ENVVAR_VALUE) error("environment variable too long"); - if (setenv(en,ev,1)) syserror("setenv"); - unsetenv(uservarn); + if (filters) + envok= load_filters(LOADF_MUST, filters, LF_END); + else { + envok= load_filters(0, + ".userv/ucgitarget.env-filter", + "/etc/userv/ucgitarget.env-filter", + LF_END); } + filter_environment(0, "USERV_U_E_", envok, default_envok, setenvar, 0); + scriptpath= 0; pathi= getenv("PATH_INFO"); - if (!pathi) error("PATH_INFO not found"); + if (!pathi) error("PATH_INFO not found", 500); lastslash= pathi; + D( if (debugmode) { + printf(";; find script name...\n" + ";; PATH_INFO = `%s'\n", + pathi); + } ) for (;;) { - if (*lastslash != '/') error("PATH_INFO expected slash not found"); - if (lastslash[1]=='.' || lastslash[1]=='#' || !lastslash[1]) error("bad char begin"); + if (*lastslash != '/') error("PATH_INFO expected slash not found", 400); + if (lastslash[1]=='.' || lastslash[1]=='#' || !lastslash[1]) + error("bad char begin", 400); nextslash= strchr(lastslash+1,'/'); if (!nextslash) nextslash= lastslash+1+strlen(lastslash+1); - if (!nextslash) error("insufficient elements in PATH_INFO"); - if (nextslash==lastslash+1) error("empty component in PATH_INFO"); - if (nextslash-pathi > MAX_SCRIPTPATH_LEN) error("PATH_INFO script path too long"); + if (!nextslash) error("insufficient elements in PATH_INFO", 400); + if (nextslash==lastslash+1) error("empty component in PATH_INFO", 400); + if (nextslash-pathi > MAX_SCRIPTPATH_LEN) + error("PATH_INFO script path too long", 400); scriptpathlen= scriptdirlen+(nextslash-pathi); scriptpath= xrealloc(scriptpath,scriptpathlen+1); strcpy(scriptpath,scriptdir); memcpy(scriptpath+scriptdirlen,pathi,nextslash-pathi); scriptpath[scriptpathlen]= 0; - if (scriptpath[scriptpathlen-1]=='~') error("bad char end"); + if (scriptpath[scriptpathlen-1]=='~') error("bad char end", 400); + D( if (debugmode) printf(";; try `%s'\n", scriptpath); ) r= stat(scriptpath,&stab); if (r) syserror("stat script"); if (S_ISREG(stab.st_mode)) break; - if (!S_ISDIR(stab.st_mode)) syserror("script not directory or file"); + if (!S_ISDIR(stab.st_mode)) error("script not directory or file", 500); lastslash= nextslash; } + D( if (debugmode) printf(";; found script: tail = `%s'\n", nextslash); ) if (*nextslash) xsetenv("PATH_INFO",nextslash,1); else unsetenv("PATH_INFO"); @@ -109,7 +183,7 @@ int main(int argc, const char **argv) { ev= getenv("SCRIPT_NAME"); if (ev) { - ev2= getenv("USER"); if (!ev2) error("no USER variable"); + ev2= getenv("USER"); if (!ev2) error("no USER variable", 500); newvar= xmalloc(strlen(ev)+2+strlen(ev2)+scriptpathlen-scriptdirlen+2); sprintf(newvar,"%s/~%s%s",ev,ev2,scriptpath+scriptdirlen); xsetenv("SCRIPT_NAME",newvar,1); @@ -122,6 +196,19 @@ int main(int argc, const char **argv) { while ((av= (*++argv))) arguments[nargs++]= av; arguments[nargs++]= 0; + D( if (debugmode) { + int i; + + printf(";; final environment...\n"); + for (i = 0; environ[i]; i++) + printf(";; %s\n", environ[i]); + + printf(";; final command line...\n"); + for (i = 0; arguments[i]; i++) + printf(";; %s\n", arguments[i]); + fflush(stdout); + } ) + execvp(scriptpath,(char*const*)arguments); syserror("exec script"); return -1;