cgi.py: Set the default static URL prefix from user's `SCRIPT_NAME'.
[chopwood] / cgi.py
1 ### -*-python-*-
2 ###
3 ### CGI machinery
4 ###
5 ### (c) 2013 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This file is part of Chopwood: a password-changing service.
11 ###
12 ### Chopwood is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU Affero General Public License as
14 ### published by the Free Software Foundation; either version 3 of the
15 ### License, or (at your option) any later version.
16 ###
17 ### Chopwood is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU Affero General Public License for more details.
21 ###
22 ### You should have received a copy of the GNU Affero General Public
23 ### License along with Chopwood; if not, see
24 ### <http://www.gnu.org/licenses/>.
25
26 from __future__ import with_statement
27
28 import contextlib as CTX
29 import os as OS; ENV = OS.environ
30 import re as RX
31 import sys as SYS
32 import time as T
33 import traceback as TB
34
35 from auto import HOME, PACKAGE, VERSION
36 import config as CONF; CFG = CONF.CFG
37 import format as F
38 import output as O; OUT = O.OUT; PRINT = O.PRINT
39 import subcommand as SC
40 import util as U
41
42 ###--------------------------------------------------------------------------
43 ### Configuration tweaks.
44
45 _script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd')
46
47 CONF.DEFAULTS.update(
48
49 ## The URL of this program, when it's run through CGI.
50 SCRIPT_NAME = _script_name,
51
52 ## A (maybe relative) URL for static content. By default this comes from
53 ## the main script, but we hope that user agents cache it.
54 STATIC = None)
55
56 @CONF.hook
57 def set_static():
58 if CFG.STATIC is None: CFG.STATIC = CFG.SCRIPT_NAME + '/static'
59
60 ###--------------------------------------------------------------------------
61 ### Escaping and encoding.
62
63 ## Some handy regular expressions.
64 R_URLESC = RX.compile('%([0-9a-fA-F]{2})')
65 R_URLBAD = RX.compile('[^-\\w,.!]')
66 R_HTMLBAD = RX.compile('[&<>\'"]')
67
68 def urldecode(s):
69 """Decode a single form-url-encoded string S."""
70 return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)),
71 s.replace('+', ' '))
72 return s
73
74 def urlencode(s):
75 """Encode a single string S using form-url-encoding."""
76 return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s)
77
78 def htmlescape(s):
79 """Escape a literal string S so that HTML doesn't misinterpret it."""
80 return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s)
81
82 ## Some standard character sequences, and HTML entity names for prettier
83 ## versions.
84 html_quotify = U.StringSubst({
85 "<": '&lt;',
86 ">": '&gt;',
87 "&": '&amp;',
88 "`": '&lsquo;',
89 "'": '&rsquo;',
90 '"': '&quot;',
91 "``": '&ldquo;',
92 "''": '&rdquo;',
93 "--": '&ndash;',
94 "---": '&mdash;'
95 })
96
97 ###--------------------------------------------------------------------------
98 ### Output machinery.
99
100 class HTTPOutput (O.FileOutput):
101 """
102 Output driver providing an automatic HTTP header.
103
104 The `headerp' attribute is true if we've written a header. The `header'
105 method will print a custom header if this is wanted.
106 """
107
108 def __init__(me, *args, **kw):
109 """Constructor: initialize `headerp' flag."""
110 super(HTTPOutput, me).__init__(*args, **kw)
111 me.headerp = False
112 me.warnings = []
113
114 def write(me, msg):
115 """Output protocol: print a header if we've not written one already."""
116 if not me.headerp: me.header('text/plain')
117 super(HTTPOutput, me).write(msg)
118
119 def header(me, content_type = 'text/plain', **kw):
120 """
121 Print a header, if none has yet been printed.
122
123 Keyword arguments can be passed to emit HTTP headers: see `http_headers'
124 for the formatting rules.
125 """
126 if me.headerp: return
127 me.headerp = True
128 for h in O.http_headers(content_type = content_type, **kw):
129 me.writeln(h)
130 me.writeln('')
131 if METHOD == 'HEAD':
132 HEADER_DONE()
133
134 def warn(me, msg):
135 """
136 Report a warning message.
137
138 The warning is stashed in a list where it can be retrieved using
139 `warnings'.
140 """
141 me.warnings.append(msg)
142
143 def cookie(name, value, **kw):
144 """
145 Return a HTTP `Set-Cookie' header.
146
147 The NAME and VALUE give the name and value of the cookie; both are
148 form-url-encoded to prevent misinterpretation (fortunately, `cgiparse'
149 knows to undo this transformation). The KW are other attributes to
150 declare: the names are forced to lower-case and underscores `_' are
151 replaced by hyphens `-'; a `True' value is assumed to indicate that the
152 attribute is boolean, and omitted.
153 """
154 attr = {}
155 for k, v in kw.iteritems():
156 k = '-'.join(i.lower() for i in k.split('_'))
157 attr[k] = v
158 try: maxage = int(attr['max-age'])
159 except KeyError: pass
160 else:
161 attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT',
162 T.gmtime(U.NOW + maxage))
163 return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] +
164 [v is not True and '%s=%s' % (k, v) or k
165 for k, v in attr.iteritems() if v])
166
167 def action(*v, **kw):
168 """
169 Build a URL invoking this script.
170
171 The positional arguments V are used to construct a path which is appended
172 to the (deduced or configured) script name (and presumably will be read
173 back as `PATH_INFO'). The keyword arguments are (form-url-encoded and)
174 appended as a query string, if present.
175 """
176 url = '/'.join([CFG.SCRIPT_NAME] + list(v))
177 if kw:
178 url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k]))
179 for k in sorted(kw))
180 return htmlescape(url)
181
182 def static(name):
183 """Build a URL for the static file NAME."""
184 return htmlescape(CFG.STATIC + '/' + name)
185
186 def redirect(where, **kw):
187 """
188 Write a complete redirection to some other URL.
189 """
190 OUT.header(content_type = 'text/html',
191 status = 302, location = where,
192 **kw)
193 PRINT("""\
194 <html>
195 <head><title>No, sorry, it's moved again.</title></head>
196 <body><p>I'm <a href="%s">over here</a> now.<body>
197 </html>""" % htmlescape(where))
198
199 ###--------------------------------------------------------------------------
200 ### Templates.
201
202 ## Where we find our templates.
203 TMPLDIR = HOME
204
205 ## Keyword arguments for templates.
206 STATE = U.Fluid()
207 STATE.kw = {}
208
209 ## Set some basic keyword arguments.
210 @CONF.hook
211 def set_template_keywords():
212 STATE.kw.update(
213 package = PACKAGE,
214 version = VERSION,
215 script = CFG.SCRIPT_NAME,
216 static = CFG.STATIC,
217 allowop = CFG.ALLOWOP)
218
219 class TemplateFinder (object):
220 """
221 A magical fake dictionary whose keys are templates.
222 """
223 def __init__(me, dir):
224 me._cache = {}
225 me._dir = dir
226 def __getitem__(me, key):
227 try: return me._cache[key]
228 except KeyError: pass
229 with open(OS.path.join(me._dir, key)) as f: tmpl = f.read()
230 me._cache[key] = tmpl
231 return tmpl
232 STATE.kw['TMPL'] = TMPL = TemplateFinder(TMPLDIR)
233
234 @CTX.contextmanager
235 def tmplkw(**kw):
236 """
237 Context manager: execute the body with additional keyword arguments
238 """
239 d = dict()
240 d.update(STATE.kw)
241 d.update(kw)
242 with STATE.bind(kw = d): yield
243
244 FORMATOPS = {}
245
246 class FormatHTML (F.SimpleFormatOperation):
247 """
248 ~H: escape output suitable for inclusion in HTML.
249
250 With `:', additionally apply quotification.
251 """
252 def _convert(me, arg):
253 if me.colonp: return html_quotify(arg)
254 else: return htmlescape(arg)
255 FORMATOPS['H'] = FormatHTML
256
257 class FormatWrap (F.BaseFormatOperation):
258 """
259 ~<...~@>: wrap enclosed material in another formatting control string.
260
261 The argument is a formatting control. The enclosed material is split into
262 pieces separated by `~;' markers. The formatting control is performed, and
263 passed the list of pieces (as compiled formatting operations) in the
264 keyword argument `wrapped'.
265 """
266 def __init__(me, *args):
267 super(FormatWrap, me).__init__(*args)
268 pieces = []
269 while True:
270 piece, delim = F.collect_subformat('>;')
271 pieces.append(piece)
272 if delim.char == '>': break
273 me.pieces = pieces
274 def _format(me, atp, colonp):
275 op = F.compile(me.getarg.get())
276 with F.FORMAT.bind(argmap = dict(F.FORMAT.argmap, wrapped = me.pieces)):
277 op.format()
278 FORMATOPS['<'] = FormatWrap
279
280 def format_tmpl(control, **kw):
281 with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]):
282 with tmplkw(**kw):
283 F.format(OUT, control, **STATE.kw)
284
285 def page(template, header = {}, title = 'Chopwood', **kw):
286 header = dict(header, content_type = 'text/html')
287 OUT.header(**header)
288 format_tmpl(TMPL['wrapper.fhtml'],
289 title = title, warnings = OUT.warnings,
290 payload = TMPL[template], **kw)
291
292 ###--------------------------------------------------------------------------
293 ### Error reporting.
294
295 @CTX.contextmanager
296 def cgi_errors(hook = None):
297 """
298 Context manager: report errors in the body as useful HTML.
299
300 If HOOK is given, then call it before reporting errors. It may have set up
301 useful stuff.
302 """
303 try:
304 yield None
305 except Exception, e:
306 if hook: hook()
307 if isinstance(e, U.ExpectedError) and not OUT.headerp:
308 page('error.fhtml',
309 header = dict(status = e.code),
310 title = 'Chopwood: error', error = e)
311 else:
312 exty, exval, extb = SYS.exc_info()
313 with tmplkw(exception = TB.format_exception_only(exty, exval),
314 traceback = TB.extract_tb(extb),
315 PARAM = sorted(PARAM),
316 COOKIE = sorted(COOKIE.items()),
317 PATH = PATH,
318 ENV = sorted(ENV.items())):
319 if OUT.headerp:
320 format_tmpl(TMPL['exception.fhtml'], toplevel = False)
321 else:
322 page('exception.fhtml',
323 header = dict(status = 500),
324 title = 'Chopwood: internal error',
325 toplevel = True)
326
327 ###--------------------------------------------------------------------------
328 ### CGI input.
329
330 ## Lots of global variables to be filled in by `cgiparse'.
331 METHOD = None
332 COOKIE = {}
333 SPECIAL = {}
334 PARAM = []
335 PARAMDICT = {}
336 PATH = []
337 SSLP = False
338 HEADER_DONE = lambda: None
339
340 ## Regular expressions for splitting apart query and cookie strings.
341 R_QSPLIT = RX.compile('[;&]')
342 R_CSPLIT = RX.compile(';')
343
344 def split_keyvalue(string, delim, default):
345 """
346 Split a STRING, and generate the resulting KEY=VALUE pairs.
347
348 The string is split at DELIM; the components are parsed into KEY[=VALUE]
349 pairs. The KEYs and VALUEs are stripped of leading and trailing
350 whitespace, and form-url-decoded. If the VALUE is omitted, then the
351 DEFAULT is used unless the DEFAULT is `None' in which case the component is
352 simply ignored.
353 """
354 for kv in delim.split(string):
355 try:
356 k, v = kv.split('=', 1)
357 except ValueError:
358 if default is None: continue
359 else: k, v = kv, default
360 k, v = k.strip(), v.strip()
361 if not k: continue
362 k, v = urldecode(k), urldecode(v)
363 yield k, v
364
365 def cgiparse():
366 """
367 Process all of the various exciting CGI environment variables.
368
369 We read environment variables and populate some tables left in global
370 variables: it's all rather old-school. Variables set are as follows.
371
372 `COOKIE'
373 A dictionary mapping cookie names to the values provided by the user
374 agent.
375
376 `SPECIAL'
377 A dictionary holding some special query parameters which are of
378 interest at a global level, and should not be passed to a subcommand
379 handler. No new entries will be added to this dictionary, though
380 values will be modified to reflect the query parameters discovered.
381 Conventionally, such parameters have names beginning with `%'.
382
383 `PARAM'
384 The query parameters as a list of (KEY, VALUE) pairs. Special
385 parameters are omitted.
386
387 `PARAMDICT'
388 The query parameters as a dictionary. Special parameters, and
389 parameters which appear more than once, are omitted.
390
391 `PATH'
392 The trailing `PATH_INFO' path, split at `/' markers, with any
393 trailing empty component removed.
394
395 `SSLP'
396 True if the client connection is carried over SSL or TLS.
397 """
398
399 global METHOD, SSLP
400
401 def getenv(var):
402 try: return ENV[var]
403 except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var)
404
405 ## Yes, we want the request method.
406 METHOD = getenv('REQUEST_METHOD')
407
408 ## Acquire the query string.
409 if METHOD in ['GET', 'HEAD']:
410 q = ENV.get('QUERY_STRING', '')
411
412 elif METHOD == 'POST':
413
414 ## We must read the query string from stdin.
415 n = getenv('CONTENT_LENGTH')
416 if not n.isdigit():
417 raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH")
418 n = int(n, 10)
419 ct = getenv('CONTENT_TYPE')
420 if ct != 'application/x-www-form-urlencoded':
421 raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct)
422 q = SYS.stdin.read(n)
423 if len(q) != n:
424 raise U.ExpectedError, (500, "Failed to read correct length")
425
426 else:
427 raise U.ExpectedError, (500, "Unexpected request method `%s'" % METHOD)
428
429 ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables.
430 seen = set()
431 for k, v in split_keyvalue(q, R_QSPLIT, 't'):
432 if k in SPECIAL:
433 SPECIAL[k] = v
434 else:
435 PARAM.append((k, v))
436 if k in seen:
437 try: del PARAMDICT[k]
438 except KeyError: pass
439 else:
440 PARAMDICT[k] = v
441 seen.add(k)
442
443 ## Parse out the cookies, if any.
444 try: c = ENV['HTTP_COOKIE']
445 except KeyError: pass
446 else:
447 for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v
448
449 ## Set up the `PATH'.
450 try: p = ENV['PATH_INFO']
451 except KeyError: pass
452 else:
453 pp = p.lstrip('/').split('/')
454 if pp and not pp[-1]: pp.pop()
455 PATH[:] = pp
456
457 ## Check the crypto for the connection.
458 if ENV.get('SSL_PROTOCOL'):
459 SSLP = True
460
461 ###--------------------------------------------------------------------------
462 ### CGI subcommands.
463
464 class Subcommand (SC.Subcommand):
465 """
466 A CGI subcommand object.
467
468 As for `subcommand.Subcommand', but with additional protocol for processing
469 CGI parameters.
470 """
471
472 def __init__(me, name, contexts, desc, func,
473 methods = ['GET', 'POST'], *args, **kw):
474 super(Subcommand, me).__init__(name, contexts, desc, func, *args, **kw)
475 me.methods = set(methods)
476
477 def cgi(me, param, path):
478 """
479 Invoke the subcommand given a collection of CGI parameters.
480
481 PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query
482 parameters are checked against the subcommand's parameters (making sure
483 that mandatory parameters are supplied, that any switches are given
484 boolean values, and that only the `rest' parameter, if any, is
485 duplicated).
486
487 PATH is a list of trailing path components. They are used to satisfy the
488 `rest' parameter if there is one and there are no query parameters which
489 satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if
490 the list of path elements is non-empty.
491 """
492
493 global HEADER_DONE
494
495 ## We're going to make a pass over the supplied parameters, and we'll
496 ## check them off against the formal parameters as we go; so we'll need
497 ## to be able to look them up. We'll also keep track of the ones we've
498 ## seen so that we can make sure that all of the mandatory parameters
499 ## were actually supplied.
500 ##
501 ## To that end: `want' is a dictionary mapping parameter names to
502 ## functions which will do something useful with the value; `seen' is a
503 ## set of the parameters which have been assigned; and `kw' is going to
504 ## be the keyword-argument dictionary we pass to the handler function.
505 want = {}
506 kw = {}
507
508 ## Check the request method against the permitted list.
509 meth = METHOD
510 if meth == 'HEAD': meth = 'GET'
511 if meth not in me.methods:
512 raise U.ExpectedError, (500, "Unexpected request method `%s'" % METHOD)
513
514 def set_value(k, v):
515 """Set a simple value: we shouldn't see multiple values."""
516 if k in kw:
517 raise U.ExpectedError, (400, "Repeated parameter `%s'" % k)
518 kw[k] = v
519 def set_bool(k, v):
520 """Set a simple boolean value: for switches."""
521 set_value(k, v.lower() in ['true', 't', 'yes', 'y'])
522 def set_list(k, v):
523 """Append the value to a list: for the `rest' parameter."""
524 kw.setdefault(k, []).append(v)
525
526 ## Set up the `want' map.
527 for o in me.opts:
528 if o.argname: want[o.name] = set_value
529 else: want[o.name] = set_bool
530 for p in me.params: want[p.name] = set_value
531 for p in me.oparams: want[p.name] = set_value
532 if me.rparam: want[me.rparam.name] = set_list
533
534 ## Work through the list of supplied parameters.
535 for k, v in param:
536 try:
537 f = want[k]
538 except KeyError:
539 if v:
540 raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k)
541 else:
542 f(k, v)
543
544 ## Deal with a path, if there is one.
545 if path:
546 if me.rparam and me.rparam.name not in kw:
547 kw[me.rparam.name] = path
548 else:
549 raise U.ExpectedError, (404, "Superfluous path elements")
550
551 ## Make sure we saw all of the mandatory parameters.
552 for p in me.params:
553 if p.name not in kw:
554 raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name)
555
556 ## Invoke the subcommand.
557 me.func(**kw)
558
559 def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw):
560 """Decorator for defining CGI subcommands."""
561 return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw)
562
563 ###----- That's all, folks --------------------------------------------------