X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/28ffcb2af24deaf7d77f62b48246b768e16c6a07..HEAD:/symm/multigen

diff --git a/symm/multigen b/symm/multigen
index d532cda7..8af0f108 100755
--- a/symm/multigen
+++ b/symm/multigen
@@ -30,52 +30,131 @@ import itertools as IT
 import optparse as OP
 import os as OS
 import re as RX
-from cStringIO import StringIO
+import sys as SYS
+if SYS.version_info >= (3,): from io import StringIO
+else: from cStringIO import StringIO
 from sys import argv, exit, stderr
 
 ###--------------------------------------------------------------------------
 ### Utilities.
 
-QUIS = OS.path.basename(argv[0])
+QUIS = OS.path.basename(argv[0])        # Program name, for use in errors.
 
 def die(msg):
+  """Report MSG as a fatal error, and exit."""
   stderr.write('%s: %s\n' % (QUIS, msg))
   exit(1)
 
 def indexed(seq):
+  """
+  Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
+  """
   return IT.izip(IT.count(), seq)
 
+if SYS.version_info >= (3,):
+  def func_name(func): return func.__name__
+  IT.izip = zip
+else:
+  def func_name(func): return func.func_name
+
+try: next
+except NameError:
+  def next(obj): return obj.next()
+
 ###--------------------------------------------------------------------------
 ### Reading the input values.
 
+## Map column names to (Relation, # index) pairs.
 COLMAP = {}
 
 class Cursor (object):
+  """
+  A Cursor object keeps track of an iteration through a Relation.
+
+  At any time, the Cursor has a `current' row; the individual cells of this
+  row may be retrieved using Python's standard indexing operator.  The `step'
+  method advances to the next row (if there is one).  The `reset' method
+  returns to row zero.
+  """
+
   def __init__(me, rel):
+    """
+    Initialize a new Cursor object, tracking its way through a Relation REL.
+
+    The new Cursor has row zero as its current row.  The REL must not be
+    empty.
+    """
     me._rel = rel
-    me._i = 0
-    me._row = rel[0]
+    me.reset()
+
   def step(me):
+    """
+    Advance the Cursor to the next row.
+
+    Returns False if there is no next row; otherwise True.
+    """
     me._i += 1
     if me._i >= len(me._rel):
       me._i = me._row = None
       return False
     me._row = me._rel[me._i]
     return True
+
   def reset(me):
+    """
+    Reset the Cursor, so that row zero is current again.
+    """
     me._i = 0
     me._row = me._rel[0]
+
   def __getitem__(me, i):
+    """
+    Return the item in column I of the Cursor's current row.
+
+    The index must be acceptable to the underlying row object, but otherwise
+    the Cursor imposes no restrictions.  Indices need not be numeric, for
+    example.
+    """
     return me._row[i]
+
   def __repr__(me):
+    """
+    Return a text description of the Cursor, for diagnostic use.
+    """
     return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
 
 class CursorSet (object):
+  """
+  A CursorSet iterates over the cartiesian product of a number of Relations.
+
+  More precisely: it maintains a stack, each level of which tracks a number
+  of Relations.  More Relations can be pushed onto this stack with the `push'
+  method, and removed with `pop'.  The `step' method advances through the
+  cartesian product of the Relations in the top level of the stack -- the
+  `active' Relations.  Columns from the current rows of all of the currently
+  known Relations -- whether active or not -- can be extracted using `get'.
+  """
+
   def __init__(me):
+    """
+    Initialize a new CursorSet object.
+
+    A new CursorSet has an empty stack.
+    """
     me._map = {}
     me._stack = []
     me._act = None
+
   def push(me, rels):
+    """
+    Push the new Relations RELS onto the stack and start iterating.
+
+    The currently active Relations are pushed down.  Those Relations which are
+    not already known to the CursorSet become the newly active collection.
+    (Relations which are already known are simply ignored.)
+
+    Iteration traverses Relations on the right more rapidly.
+    """
     cc = []
     rr = []
     for r in rels:
@@ -85,7 +164,14 @@ class CursorSet (object):
       cc.append(c)
     me._stack.append((me._act, rr))
     me._act = cc
+
   def step(me):
+    """
+    Advance the CursorSet through the currently active Relations.
+
+    Return False if the active Relations have now been exhausted; otherwise
+    return True.
+    """
     i = 0
     while i < len(me._act):
       if me._act[i].step(): return True
@@ -93,38 +179,92 @@ class CursorSet (object):
       me._act[i].reset()
       i += 1
     return False
+
   def pop(me):
+    """
+    Pop the active Relations.
+
+    Return to iterating over the previously active collection.
+    """
     me._act, rels = me._stack.pop()
     for r in rels: del me._map[r]
+
   def get(me, rel, i):
+    """
+    Return the item with index I in the current row of Relation REL.
+    """
     return me._map[rel][i]
 
 class Relation (object):
+  """
+  A Relation keeps track of a table of data.
+
+  A Relation consists of a `header', which is a sequence of string names,
+  and a rectangular array of data, each row of which has the same number of
+  items as the header.
+
+  Relations can be iterated over using Cursors and CursorSets.
+  """
+
   def __init__(me, head):
+    """
+    Initialize a new, empty Relation with header HEAD.
+
+    The `COLMAP' dictionary is updated to map the names in the header to this
+    Relation and its column indices.
+    """
     me._head = head
     me._rows = []
     for i, c in indexed(head): COLMAP[c] = me, i
+
   def addrow(me, row):
+    """
+    Add a ROW to the Relation.
+
+    The new row must have the correct number of entries.
+    """
     if len(row) != len(me._head):
       die("mismatch: row `%s' doesn't match heading `%s'" %
-          (', '.join(row), ', '.join(head)))
+          (', '.join(row), ', '.join(me._head)))
     me._rows.append(row)
+
   def __len__(me):
+    """Return the number of rows in the Relation."""
     return len(me._rows)
+
   def __getitem__(me, i):
+    """Return the Ith row of the Relation."""
     return me._rows[i]
+
   def __repr__(me):
+    """Return a textual description of the Relation, for diagnostic use."""
     return '#<Relation %r>' % me._head
 
 def read_immediate(word):
+  """
+  Return a Relation constructed by parsing WORD.
+
+  The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
+  comma-separated lists of strings which will form the relation's header and
+  rows respectively.  There is no way to include an item which contains a
+  comma or whitespace.
+  """
   head, rels = word.split('=', 1)
   rel = Relation([c.strip() for c in head.split(',')])
   for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
 
 def read_file(spec):
+  """
+  Return a Relation constructed from a file, according to SPEC.
+
+  The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
+  comma-separated list of strings to form the relation's header.  Each line
+  from the file which is neither empty nor begins with `#' is split into
+  whitespace-separated words to form a row in the relation.  There is no way
+  to include an item which contains whitespace.
+  """
   file, head = spec.split(':', 1)
   rel = Relation([c.strip() for c in head.split(',')])
-  cols = [c.strip() for c in head.split(',')]
   with open(file) as f:
     for line in f:
       line = line.strip()
@@ -132,6 +272,13 @@ def read_file(spec):
       rel.addrow(line.split())
 
 def read_thing(spec):
+  """
+  Return a relation constructed from SPEC.
+
+  If SPEC begins with `@' then read the relation from a file (see
+  `read_file'); otherwise interpret it as immediate data (see
+  `read_immediate').
+  """
   if spec.startswith('@'): read_file(spec[1:])
   else: read_immediate(spec)
 
@@ -139,64 +286,153 @@ def read_thing(spec):
 ### Template structure.
 
 class BasicTemplate (object):
+  """
+  Base class for template objects.
+
+  The protocol for templates consists of two methods:
+
+  relations()           Return a set of Relations mentioned at top-level in
+                        substitutions in the template.
+
+  subst(OUT, CS)        Fill in the template, writing the output to the
+                        stream OUT.  The CS is a CursorSet object tracking
+                        the current iteration state.
+  """
   pass
 
 class LiteralTemplate (BasicTemplate):
+  """
+  A LiteralTemplate outputs a fixed string.
+  """
+
   def __init__(me, text, **kw):
+    """
+    Initialize a new LiteralTemplate object.  TEXT is the text to be written.
+    """
     super(LiteralTemplate, me).__init__(**kw)
     me._text = text
+
   def relations(me):
+    """A LiteralTemplate contains no substitutions."""
     return set()
+
   def subst(me, out, cs):
+    """A LiteralTemplate just emits its text."""
     out.write(me._text)
+
   def __repr__(me):
     return '#<LiteralTemplate %r>' % me._text
 
 class TagTemplate (BasicTemplate):
+  """
+  A TagTemplate object expands a substitution tag.
+
+  It extracts an item from the current row of a relation, processes it
+  according to an operation, and outputs the result.
+  """
+
   def __init__(me, rel, i, op, **kw):
+    """
+    Initialize a new TagTemplate object.
+
+    REL is the relation from which to pick the output; I is the column index;
+    OP is a transformation to apply to the data, and may be None to indicate
+    that the data should not be transformed.
+    """
     super(TagTemplate, me).__init__(**kw)
     me._rel = rel
     me._i = i
     me._op = op
+
   def relations(me):
+    """The TagTemplate knows which relation it uses."""
     return set([me._rel])
+
   def subst(me, out, cs):
+    """
+    A TagTemplate extracts and transforms an item from the current row of
+    a relation.
+    """
     val = cs.get(me._rel, me._i)
     if me._op is not None: val = me._op(val)
     out.write(val)
+
   def __repr__(me):
     return '#<TagTemplate %s>' % me._rel._head[me._i]
 
 class SequenceTemplate (BasicTemplate):
+  """
+  A SequenceTemplate concatenates a number of other templates.
+  """
+
   def __new__(cls, seq, **kw):
+    """
+    Construct a template from a sequence SEQ of other templates.
+
+    If SEQ is a singleton (which it often is) then return it directly;
+    otherwise construct a SequenceTemplate.
+    """
     if len(seq) == 1:
       return seq[0]
     else:
-      me = super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
-      tt = []
-      cls = type(me)
-      for t in seq:
-        if isinstance(t, cls): tt += t._seq
-        else: tt.append(t)
-      me._seq = tt
-      return me
+      return super(SequenceTemplate, cls).__new__(cls, **kw)
+
   def __init__(me, seq, **kw):
+    """
+    Initialize a new SequenceTemplate object from SEQ.
+
+    The sequence is flattened out: if SEQ contains SequenceTemplates then we
+    use their children directly, so that we don't have a useless tree.
+    """
     super(SequenceTemplate, me).__init__(**kw)
+    tt = []
+    cls = type(me)
+    for t in seq:
+      if isinstance(t, cls): tt += t._seq
+      else: tt.append(t)
+    me._seq = tt
+
   def relations(me):
+    """
+    The relations of a SequenceTemplate are the union of the relations of its
+    children.
+    """
     rr = set()
     for t in me._seq: rr.update(t.relations())
     return rr
+
   def subst(me, out, cs):
+    """
+    The output of a SequenceTemplate is the concatenation of the expansions
+    of its children.
+    """
     for t in me._seq: t.subst(out, cs)
+
   def __repr__(me):
     return '#<SequenceTemplate %r>' % me._seq
 
 class RepeatTemplate (BasicTemplate):
+  """
+  A RepeatTemplate iterates its body over a number of relations.
+  """
+
   def __init__(me, sub):
+    """
+    Initialize a new RepeatTemplate, given a template to act as its body.
+    """
     me._sub = sub
+
   def relations(me):
+    """
+    A RepeatTemplate hides the relations of its body.
+    """
     return set()
+
   def subst(me, out, cs):
+    """
+    Substitute a RepeatTemplate, by iterating over the relations mentioned in
+    its body template.
+    """
     rr = me._sub.relations()
     for r in rr:
       if len(r) == 0: return
@@ -205,6 +441,7 @@ class RepeatTemplate (BasicTemplate):
       me._sub.subst(out, cs)
       if not cs.step(): break
     cs.pop()
+
   def __repr__(me):
     return '#<RepeatTemplate %r>' % me._sub
 
@@ -212,110 +449,249 @@ class RepeatTemplate (BasicTemplate):
 ### Some slightly cheesy parsing machinery.
 
 class ParseState (object):
+  """
+  A ParseState object keeps track of a parser's position in a file.
+
+  The `curr' slot contains the current line under consideration.
+  """
+
   def __init__(me, file, text):
+    """
+    Initialize a ParseState object.
+
+    The FILE is a string naming the source file, and the TEXT is an iterator
+    over the file's lines.
+    """
     me._file = file
     me._i = 0
     me._it = iter(text.splitlines(True))
     me.step()
+
   def step(me):
-    try: me.curr = me._it.next()
+    """
+    Advance the ParseState to the next line.
+
+    Sets `curr' to the next line, or to None if the input is exhausted.
+    """
+    try: me.curr = next(me._it)
     except StopIteration: me.curr = None
     else: me._i += 1
+
   def error(me, msg):
+    """
+    Report a fatal error during parsing, attributing it to the current line.
+    """
     die('%s:%d: %s' % (me._file, me._i, msg))
 
 class token (object):
+  """
+  A token object has no interesting properties other than its identity.
+  """
+
   def __init__(me, name):
+    """Initialize a new token, with the given NAME."""
     me._name = name
   def __repr__(me):
+    """Return a description of the token, for diagnostic purposes."""
     return '#<%s>' % me._name
 
+## Some magical tokens useful during parsing.
 EOF = token('eof')
 END = token('end')
 
+## Regular expressions matching substitution tags.
 R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
 R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
 
+## A dictionary mapping operation names to functions which implement them.
 OPMAP = {}
 
 def defop(func):
-  name = func.func_name
+  """
+  Decorator for substitution operator functions.
+
+  Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
+  name, removing a prefix `op_' if there is one.
+
+  An operator function is given the raw value as an argument and should
+  return the transformed value.
+  """
+  name = func_name(func)
   if name.startswith('op_'): name = name[3:]
   OPMAP[name] = func
   return func
 
 @defop
-def op_u(val): return val.upper()
+def op_u(val):
+  """@{COLUMN:u} -- the item in upper case."""
+  return val.upper()
+
+@defop
+def op_l(val):
+  """@{COLUMN:l} -- the item in upper case."""
+  return val.lower()
 
 @defop
-def op_l(val): return val.lower()
+def op_f(val):
+  """@{COLUMN:f} -- the item, with `/' characters replaced by `-'."""
+  return val.replace('/', '-')
 
 R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
 @defop
-def op_c(val): return R_NOTIDENT.sub('_', val)
+def op_c(val):
+  """
+  @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
+  """
+  return R_NOTIDENT.sub('_', val)
 
 def _pairify(val):
+  """
+  Split VAL into two, at an `=' sign.
+
+  If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
+  otherwise return (VAL, VAL).
+  """
   c = val.find('=')
   if c >= 0: return val[:c], val[c + 1:]
   else: return val, val
 
 @defop
-def op_left(val): return _pairify(val)[0]
+def op_left(val):
+  """@{COLUMN:left} -- the left-hand side of the item."""
+  return _pairify(val)[0]
 @defop
-def op_right(val): return _pairify(val)[1]
+def op_right(val):
+  """@{COLUMN:right} -- the left-hand side of the item."""
+  return _pairify(val)[1]
 
 def parse_text(ps):
+  """
+  Parse a chunk of text from a ParseState.
+
+  Stop when we get to something which looks like a template keyword, but
+  extract tags.  Return the resulting template.
+
+  Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'.  The text may
+  contain comments beginning `%#', which are ignored, and lines beginning
+  `%%' which have the initial `%' removed and are otherwise treated as normal
+  text (and, in particular, may contain tags).  Other lines beginning with
+  `%' are directives and must be processed by our caller.
+  """
+
+  ## Starting out: no templates collected, and an empty buffer of literal
+  ## text.
   tt = []
   lit = StringIO()
+
   def spill():
+    ## Spill accumulated literal text from `lit' into a LiteralTemplate
+    ## object.
     l = lit.getvalue()
     if l: tt.append(LiteralTemplate(l))
-    lit.reset()
+    lit.seek(0)
     lit.truncate()
+
+  ## Iterate over the lines of input.
   while True:
     line = ps.curr
+
+    ## Stop if there's no more text; handle lines beginning with `%'.
     if line is None: break
     elif line.startswith('%'):
       if line.startswith('%#'): ps.step(); continue
       elif line.startswith('%%'): line = line[1:]
       else: break
+
+    ## Work through the line, finding tags.
     i = 0
     while True:
+
+      ## If there are no more `@' signs, there can be no more tags, and we're
+      ## done.
       j = line.find('@', i)
       if j < 0: break
+
+      ## Write the chunk we've found.
       lit.write(line[i:j])
+
+      ## If the next character is also `@' then this is an escape and we
+      ## should carry on.
+      if line[j:].startswith('@@'):
+        lit.write('@')
+        i = j + 2
+        continue
+
+      ## Parse the tag into a column name, and maybe some operators.
       m = R_SIMPLETAG.match(line, j)
       if not m: m = R_COMPLEXTAG.match(line, j)
       if not m: ps.error('invalid tag')
       col = m.group(1)
       try: rel, i = COLMAP[col]
       except KeyError: ps.error("unknown column `%s'" % col)
-      wholeop = None
       ops = m.lastindex >= 2 and m.group(2)
+
+      ## If we have operators then look them up and compose them.
+      wholeop = None
       if ops:
         for opname in ops[1:].split(':'):
           try: op = OPMAP[opname]
           except KeyError: ps.error("unknown operation `%s'" % opname)
           if wholeop is None: wholeop = op
           else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
+
+      ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
+      ## for the tag.
       spill()
       tt.append(TagTemplate(rel, i, wholeop))
+
+      ## Continue from after the tag.
       i = m.end()
+
+    ## Finished a line.  Write out the remainder of the line and move onto
+    ## the next.
     lit.write(line[i:])
     ps.step()
+
+  ## Run out of things to do.  Flush out the rest of the literal text and
+  ## combine the templates.
   spill()
   return SequenceTemplate(tt)
 
+## A dictionary mapping regular expressions to directive-processing functions.
 DIRECT = []
 
 def direct(rx):
+  """
+  Function decorator for template file directives.
+
+  Associate the regular expression RX with the function in `DIRECT'.
+  Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
+  and M is the match object resulting from matching RX against the directive
+  text.
+  """
   def _(func):
     DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
     return func
   return _
 
 def parse_template(ps):
+  """
+  Parse a single template from the ParseState PS.
+
+  A single template is either a chunk of text (parsed by `parse_text') or a
+  directive (handled by the appropriate function in `DIRECT').
+
+  Returns either a template object, or a special token.  In particular, `EOF'
+  is returned if we run out of text; directives may return other tokens.
+  """
+
+  ## Skip initial comments.  Otherwise we might end up with an empty
+  ## SequenceTemplate here.
   while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
+
+  ## If we've run out of input, return `EOF' here.  A line beginning `%%', or
+  ## not beginning `%', means we've found a chunk of text.  Otherwise find
+  ## the right directive handler.
   if ps.curr is None: return EOF
   elif ps.curr.startswith('%'):
     if ps.curr.startswith('%%'): return parse_text(ps)
@@ -330,6 +706,16 @@ def parse_template(ps):
     return parse_text(ps)
 
 def parse_templseq(ps, nestp):
+  """
+  Parse a sequence of templates from the ParseState PS.
+
+  Calls `parse_template' repeatedly  If NESTP is true, then an `END' token
+  (presumably from a directive handler) is permitted and halts parsing;
+  otherwise `END' signifies an error.
+
+  Returns a template object.
+  """
+
   tt = []
   while True:
     t = parse_template(ps)
@@ -344,13 +730,25 @@ def parse_templseq(ps, nestp):
 
 @direct(r'repeat')
 def dir_repeat(ps, m):
+  """
+  %repeat
+  BODY
+  %end
+
+  Iterate the body over the cartesian product of the relations mentioned
+  within.
+  """
   return RepeatTemplate(parse_templseq(ps, True))
 
 @direct(r'end')
 def dir_end(ps, m):
+  """%end -- an end marker used to delimet chunks of template."""
   return END
 
 def compile_template(file, text):
+  """
+  Compile TEXT into a template, attributing errors to FILE.
+  """
   ps = ParseState(file, text)
   t = parse_templseq(ps, False)
   return t
@@ -360,17 +758,21 @@ def compile_template(file, text):
 
 op = OP.OptionParser(
   description = 'Generates files by filling in simple templates',
-  usage = 'usage: %prog [-gl] FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
+  usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
   version = 'Catacomb version @VERSION@')
+def cb_gen(opt, optstr, arg, op):
+  op.values.input = arg
+  op.values.mode = 'gen'
 for short, long, kw in [
   ('-l', '--list', dict(
       action = 'store_const', const = 'list', dest = 'mode',
       help = 'list filenames generated')),
   ('-g', '--generate', dict(
-      action = 'store', metavar = 'PATH', dest = 'input',
-      help = 'generate output (default)'))]:
+      action = 'callback', metavar = 'TEMPLATE',
+      callback = cb_gen, type = 'string',
+      help = 'generate file(s) from TEMPLATE file'))]:
   op.add_option(short, long, **kw)
-op.set_defaults(mode = 'gen')
+op.set_defaults(mode = 'what?')
 opts, args = op.parse_args()
 
 if len(args) < 1: op.error('missing FILE')
@@ -379,6 +781,9 @@ for rel in args[1:]: read_thing(rel)
 filetempl = compile_template('<output>', filepat)
 
 def filenames(filetempl):
+  """
+  Generate the filenames in the compiled filename template FILETEMPL.
+  """
   cs = CursorSet()
   rr = filetempl.relations()
   for r in rr:
@@ -391,8 +796,9 @@ def filenames(filetempl):
     if not cs.step(): break
   cs.pop()
 
+## Main dispatch.
 if opts.mode == 'list':
-  for file, cs in filenames(filetempl): print file
+  for file, cs in filenames(filetempl): print(file)
 elif opts.mode == 'gen':
   with open(opts.input) as f:
     templ = RepeatTemplate(compile_template(opts.input, f.read()))
@@ -402,6 +808,6 @@ elif opts.mode == 'gen':
       templ.subst(out, cs)
     OS.rename(new, file)
 else:
-  raise Exception, 'What am I doing here?'
+  die('What am I doing here?')
 
 ###----- That's all, folks --------------------------------------------------