progs/perftest.c: Use from Glibc syscall numbers.

[catacomb] / symm / multigen
diff --git a/symm/multigen b/symm/multigen

index 2d626c2..8af0f10 100755 (executable)
--- a/symm/multigen
+++ b/symm/multigen
@@ -30,52 +30,131 @@ import itertools as IT
  import optparse as OP
  import os as OS
  import re as RX
-from cStringIO import StringIO
+import sys as SYS
+if SYS.version_info >= (3,): from io import StringIO
+else: from cStringIO import StringIO
  from sys import argv, exit, stderr
  
  ###--------------------------------------------------------------------------
  ### Utilities.
  
-QUIS = OS.path.basename(argv[0])
+QUIS = OS.path.basename(argv[0])        # Program name, for use in errors.
  
  def die(msg):
+  """Report MSG as a fatal error, and exit."""
    stderr.write('%s: %s\n' % (QUIS, msg))
    exit(1)
  
  def indexed(seq):
+  """
+  Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
+  """
    return IT.izip(IT.count(), seq)
  
+if SYS.version_info >= (3,):
+  def func_name(func): return func.__name__
+  IT.izip = zip
+else:
+  def func_name(func): return func.func_name
+
+try: next
+except NameError:
+  def next(obj): return obj.next()
+
  ###--------------------------------------------------------------------------
  ### Reading the input values.
  
+## Map column names to (Relation, # index) pairs.
  COLMAP = {}
  
  class Cursor (object):
+  """
+  A Cursor object keeps track of an iteration through a Relation.
+
+  At any time, the Cursor has a `current' row; the individual cells of this
+  row may be retrieved using Python's standard indexing operator.  The `step'
+  method advances to the next row (if there is one).  The `reset' method
+  returns to row zero.
+  """
+
    def __init__(me, rel):
+    """
+    Initialize a new Cursor object, tracking its way through a Relation REL.
+
+    The new Cursor has row zero as its current row.  The REL must not be
+    empty.
+    """
      me._rel = rel
-    me._i = 0
-    me._row = rel[0]
+    me.reset()
+
    def step(me):
+    """
+    Advance the Cursor to the next row.
+
+    Returns False if there is no next row; otherwise True.
+    """
      me._i += 1
      if me._i >= len(me._rel):
        me._i = me._row = None
        return False
      me._row = me._rel[me._i]
      return True
+
    def reset(me):
+    """
+    Reset the Cursor, so that row zero is current again.
+    """
      me._i = 0
      me._row = me._rel[0]
+
    def __getitem__(me, i):
+    """
+    Return the item in column I of the Cursor's current row.
+
+    The index must be acceptable to the underlying row object, but otherwise
+    the Cursor imposes no restrictions.  Indices need not be numeric, for
+    example.
+    """
      return me._row[i]
+
    def __repr__(me):
+    """
+    Return a text description of the Cursor, for diagnostic use.
+    """
      return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
  
  class CursorSet (object):
+  """
+  A CursorSet iterates over the cartiesian product of a number of Relations.
+
+  More precisely: it maintains a stack, each level of which tracks a number
+  of Relations.  More Relations can be pushed onto this stack with the `push'
+  method, and removed with `pop'.  The `step' method advances through the
+  cartesian product of the Relations in the top level of the stack -- the
+  `active' Relations.  Columns from the current rows of all of the currently
+  known Relations -- whether active or not -- can be extracted using `get'.
+  """
+
    def __init__(me):
+    """
+    Initialize a new CursorSet object.
+
+    A new CursorSet has an empty stack.
+    """
      me._map = {}
      me._stack = []
      me._act = None
+
    def push(me, rels):
+    """
+    Push the new Relations RELS onto the stack and start iterating.
+
+    The currently active Relations are pushed down.  Those Relations which are
+    not already known to the CursorSet become the newly active collection.
+    (Relations which are already known are simply ignored.)
+
+    Iteration traverses Relations on the right more rapidly.
+    """
      cc = []
      rr = []
      for r in rels:
@@ -85,7 +164,14 @@ class CursorSet (object):
        cc.append(c)
      me._stack.append((me._act, rr))
      me._act = cc
+
    def step(me):
+    """
+    Advance the CursorSet through the currently active Relations.
+
+    Return False if the active Relations have now been exhausted; otherwise
+    return True.
+    """
      i = 0
      while i < len(me._act):
        if me._act[i].step(): return True
@@ -93,38 +179,92 @@ class CursorSet (object):
        me._act[i].reset()
        i += 1
      return False
+
    def pop(me):
+    """
+    Pop the active Relations.
+
+    Return to iterating over the previously active collection.
+    """
      me._act, rels = me._stack.pop()
      for r in rels: del me._map[r]
+
    def get(me, rel, i):
+    """
+    Return the item with index I in the current row of Relation REL.
+    """
      return me._map[rel][i]
  
  class Relation (object):
+  """
+  A Relation keeps track of a table of data.
+
+  A Relation consists of a `header', which is a sequence of string names,
+  and a rectangular array of data, each row of which has the same number of
+  items as the header.
+
+  Relations can be iterated over using Cursors and CursorSets.
+  """
+
    def __init__(me, head):
+    """
+    Initialize a new, empty Relation with header HEAD.
+
+    The `COLMAP' dictionary is updated to map the names in the header to this
+    Relation and its column indices.
+    """
      me._head = head
      me._rows = []
      for i, c in indexed(head): COLMAP[c] = me, i
+
    def addrow(me, row):
+    """
+    Add a ROW to the Relation.
+
+    The new row must have the correct number of entries.
+    """
      if len(row) != len(me._head):
        die("mismatch: row `%s' doesn't match heading `%s'" %
-          (', '.join(row), ', '.join(head)))
+          (', '.join(row), ', '.join(me._head)))
      me._rows.append(row)
+
    def __len__(me):
+    """Return the number of rows in the Relation."""
      return len(me._rows)
+
    def __getitem__(me, i):
+    """Return the Ith row of the Relation."""
      return me._rows[i]
+
    def __repr__(me):
+    """Return a textual description of the Relation, for diagnostic use."""
      return '#<Relation %r>' % me._head
  
  def read_immediate(word):
+  """
+  Return a Relation constructed by parsing WORD.
+
+  The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
+  comma-separated lists of strings which will form the relation's header and
+  rows respectively.  There is no way to include an item which contains a
+  comma or whitespace.
+  """
    head, rels = word.split('=', 1)
    rel = Relation([c.strip() for c in head.split(',')])
    for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
  
  def read_file(spec):
+  """
+  Return a Relation constructed from a file, according to SPEC.
+
+  The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
+  comma-separated list of strings to form the relation's header.  Each line
+  from the file which is neither empty nor begins with `#' is split into
+  whitespace-separated words to form a row in the relation.  There is no way
+  to include an item which contains whitespace.
+  """
    file, head = spec.split(':', 1)
    rel = Relation([c.strip() for c in head.split(',')])
-  cols = [c.strip() for c in head.split(',')]
    with open(file) as f:
      for line in f:
        line = line.strip()
@@ -132,6 +272,13 @@ def read_file(spec):
        rel.addrow(line.split())
  
  def read_thing(spec):
+  """
+  Return a relation constructed from SPEC.
+
+  If SPEC begins with `@' then read the relation from a file (see
+  `read_file'); otherwise interpret it as immediate data (see
+  `read_immediate').
+  """
    if spec.startswith('@'): read_file(spec[1:])
    else: read_immediate(spec)
  
@@ -139,64 +286,153 @@ def read_thing(spec):
  ### Template structure.
  
  class BasicTemplate (object):
+  """
+  Base class for template objects.
+
+  The protocol for templates consists of two methods:
+
+  relations()           Return a set of Relations mentioned at top-level in
+                        substitutions in the template.
+
+  subst(OUT, CS)        Fill in the template, writing the output to the
+                        stream OUT.  The CS is a CursorSet object tracking
+                        the current iteration state.
+  """
    pass
  
  class LiteralTemplate (BasicTemplate):
+  """
+  A LiteralTemplate outputs a fixed string.
+  """
+
    def __init__(me, text, **kw):
+    """
+    Initialize a new LiteralTemplate object.  TEXT is the text to be written.
+    """
      super(LiteralTemplate, me).__init__(**kw)
      me._text = text
+
    def relations(me):
+    """A LiteralTemplate contains no substitutions."""
      return set()
+
    def subst(me, out, cs):
+    """A LiteralTemplate just emits its text."""
      out.write(me._text)
+
    def __repr__(me):
      return '#<LiteralTemplate %r>' % me._text
  
  class TagTemplate (BasicTemplate):
+  """
+  A TagTemplate object expands a substitution tag.
+
+  It extracts an item from the current row of a relation, processes it
+  according to an operation, and outputs the result.
+  """
+
    def __init__(me, rel, i, op, **kw):
+    """
+    Initialize a new TagTemplate object.
+
+    REL is the relation from which to pick the output; I is the column index;
+    OP is a transformation to apply to the data, and may be None to indicate
+    that the data should not be transformed.
+    """
      super(TagTemplate, me).__init__(**kw)
      me._rel = rel
      me._i = i
      me._op = op
+
    def relations(me):
+    """The TagTemplate knows which relation it uses."""
      return set([me._rel])
+
    def subst(me, out, cs):
+    """
+    A TagTemplate extracts and transforms an item from the current row of
+    a relation.
+    """
      val = cs.get(me._rel, me._i)
      if me._op is not None: val = me._op(val)
      out.write(val)
+
    def __repr__(me):
      return '#<TagTemplate %s>' % me._rel._head[me._i]
  
  class SequenceTemplate (BasicTemplate):
+  """
+  A SequenceTemplate concatenates a number of other templates.
+  """
+
    def __new__(cls, seq, **kw):
+    """
+    Construct a template from a sequence SEQ of other templates.
+
+    If SEQ is a singleton (which it often is) then return it directly;
+    otherwise construct a SequenceTemplate.
+    """
      if len(seq) == 1:
        return seq[0]
      else:
-      me = super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
-      tt = []
-      cls = type(me)
-      for t in seq:
-        if isinstance(t, cls): tt += t._seq
-        else: tt.append(t)
-      me._seq = tt
-      return me
+      return super(SequenceTemplate, cls).__new__(cls, **kw)
+
    def __init__(me, seq, **kw):
+    """
+    Initialize a new SequenceTemplate object from SEQ.
+
+    The sequence is flattened out: if SEQ contains SequenceTemplates then we
+    use their children directly, so that we don't have a useless tree.
+    """
      super(SequenceTemplate, me).__init__(**kw)
+    tt = []
+    cls = type(me)
+    for t in seq:
+      if isinstance(t, cls): tt += t._seq
+      else: tt.append(t)
+    me._seq = tt
+
    def relations(me):
+    """
+    The relations of a SequenceTemplate are the union of the relations of its
+    children.
+    """
      rr = set()
      for t in me._seq: rr.update(t.relations())
      return rr
+
    def subst(me, out, cs):
+    """
+    The output of a SequenceTemplate is the concatenation of the expansions
+    of its children.
+    """
      for t in me._seq: t.subst(out, cs)
+
    def __repr__(me):
      return '#<SequenceTemplate %r>' % me._seq
  
  class RepeatTemplate (BasicTemplate):
+  """
+  A RepeatTemplate iterates its body over a number of relations.
+  """
+
    def __init__(me, sub):
+    """
+    Initialize a new RepeatTemplate, given a template to act as its body.
+    """
      me._sub = sub
+
    def relations(me):
+    """
+    A RepeatTemplate hides the relations of its body.
+    """
      return set()
+
    def subst(me, out, cs):
+    """
+    Substitute a RepeatTemplate, by iterating over the relations mentioned in
+    its body template.
+    """
      rr = me._sub.relations()
      for r in rr:
        if len(r) == 0: return
@@ -205,6 +441,7 @@ class RepeatTemplate (BasicTemplate):
        me._sub.subst(out, cs)
        if not cs.step(): break
      cs.pop()
+
    def __repr__(me):
      return '#<RepeatTemplate %r>' % me._sub
  
@@ -212,109 +449,249 @@ class RepeatTemplate (BasicTemplate):
  ### Some slightly cheesy parsing machinery.
  
  class ParseState (object):
+  """
+  A ParseState object keeps track of a parser's position in a file.
+
+  The `curr' slot contains the current line under consideration.
+  """
+
    def __init__(me, file, text):
+    """
+    Initialize a ParseState object.
+
+    The FILE is a string naming the source file, and the TEXT is an iterator
+    over the file's lines.
+    """
      me._file = file
      me._i = 0
      me._it = iter(text.splitlines(True))
      me.step()
+
    def step(me):
-    me.curr = next(me._it, None)
-    if me.curr is not None: me._i += 1
+    """
+    Advance the ParseState to the next line.
+
+    Sets `curr' to the next line, or to None if the input is exhausted.
+    """
+    try: me.curr = next(me._it)
+    except StopIteration: me.curr = None
+    else: me._i += 1
+
    def error(me, msg):
+    """
+    Report a fatal error during parsing, attributing it to the current line.
+    """
      die('%s:%d: %s' % (me._file, me._i, msg))
  
  class token (object):
+  """
+  A token object has no interesting properties other than its identity.
+  """
+
    def __init__(me, name):
+    """Initialize a new token, with the given NAME."""
      me._name = name
    def __repr__(me):
+    """Return a description of the token, for diagnostic purposes."""
      return '#<%s>' % me._name
  
+## Some magical tokens useful during parsing.
  EOF = token('eof')
  END = token('end')
  
+## Regular expressions matching substitution tags.
  R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
  R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
  
+## A dictionary mapping operation names to functions which implement them.
  OPMAP = {}
  
  def defop(func):
-  name = func.func_name
+  """
+  Decorator for substitution operator functions.
+
+  Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
+  name, removing a prefix `op_' if there is one.
+
+  An operator function is given the raw value as an argument and should
+  return the transformed value.
+  """
+  name = func_name(func)
    if name.startswith('op_'): name = name[3:]
    OPMAP[name] = func
    return func
  
  @defop
-def op_u(val): return val.upper()
+def op_u(val):
+  """@{COLUMN:u} -- the item in upper case."""
+  return val.upper()
+
+@defop
+def op_l(val):
+  """@{COLUMN:l} -- the item in upper case."""
+  return val.lower()
  
  @defop
-def op_l(val): return val.lower()
+def op_f(val):
+  """@{COLUMN:f} -- the item, with `/' characters replaced by `-'."""
+  return val.replace('/', '-')
  
  R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
  @defop
-def op_c(val): return R_NOTIDENT.sub('_', val)
+def op_c(val):
+  """
+  @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
+  """
+  return R_NOTIDENT.sub('_', val)
  
  def _pairify(val):
+  """
+  Split VAL into two, at an `=' sign.
+
+  If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
+  otherwise return (VAL, VAL).
+  """
    c = val.find('=')
    if c >= 0: return val[:c], val[c + 1:]
    else: return val, val
  
  @defop
-def op_left(val): return _pairify(val)[0]
+def op_left(val):
+  """@{COLUMN:left} -- the left-hand side of the item."""
+  return _pairify(val)[0]
  @defop
-def op_right(val): return _pairify(val)[1]
+def op_right(val):
+  """@{COLUMN:right} -- the left-hand side of the item."""
+  return _pairify(val)[1]
  
  def parse_text(ps):
+  """
+  Parse a chunk of text from a ParseState.
+
+  Stop when we get to something which looks like a template keyword, but
+  extract tags.  Return the resulting template.
+
+  Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'.  The text may
+  contain comments beginning `%#', which are ignored, and lines beginning
+  `%%' which have the initial `%' removed and are otherwise treated as normal
+  text (and, in particular, may contain tags).  Other lines beginning with
+  `%' are directives and must be processed by our caller.
+  """
+
+  ## Starting out: no templates collected, and an empty buffer of literal
+  ## text.
    tt = []
    lit = StringIO()
+
    def spill():
+    ## Spill accumulated literal text from `lit' into a LiteralTemplate
+    ## object.
      l = lit.getvalue()
      if l: tt.append(LiteralTemplate(l))
-    lit.reset()
+    lit.seek(0)
      lit.truncate()
+
+  ## Iterate over the lines of input.
    while True:
      line = ps.curr
+
+    ## Stop if there's no more text; handle lines beginning with `%'.
      if line is None: break
      elif line.startswith('%'):
        if line.startswith('%#'): ps.step(); continue
        elif line.startswith('%%'): line = line[1:]
        else: break
+
+    ## Work through the line, finding tags.
      i = 0
      while True:
+
+      ## If there are no more `@' signs, there can be no more tags, and we're
+      ## done.
        j = line.find('@', i)
        if j < 0: break
+
+      ## Write the chunk we've found.
        lit.write(line[i:j])
+
+      ## If the next character is also `@' then this is an escape and we
+      ## should carry on.
+      if line[j:].startswith('@@'):
+        lit.write('@')
+        i = j + 2
+        continue
+
+      ## Parse the tag into a column name, and maybe some operators.
        m = R_SIMPLETAG.match(line, j)
        if not m: m = R_COMPLEXTAG.match(line, j)
        if not m: ps.error('invalid tag')
        col = m.group(1)
        try: rel, i = COLMAP[col]
        except KeyError: ps.error("unknown column `%s'" % col)
-      wholeop = None
        ops = m.lastindex >= 2 and m.group(2)
+
+      ## If we have operators then look them up and compose them.
+      wholeop = None
        if ops:
          for opname in ops[1:].split(':'):
            try: op = OPMAP[opname]
            except KeyError: ps.error("unknown operation `%s'" % opname)
            if wholeop is None: wholeop = op
            else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
+
+      ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
+      ## for the tag.
        spill()
        tt.append(TagTemplate(rel, i, wholeop))
+
+      ## Continue from after the tag.
        i = m.end()
+
+    ## Finished a line.  Write out the remainder of the line and move onto
+    ## the next.
      lit.write(line[i:])
      ps.step()
+
+  ## Run out of things to do.  Flush out the rest of the literal text and
+  ## combine the templates.
    spill()
    return SequenceTemplate(tt)
  
+## A dictionary mapping regular expressions to directive-processing functions.
  DIRECT = []
  
  def direct(rx):
+  """
+  Function decorator for template file directives.
+
+  Associate the regular expression RX with the function in `DIRECT'.
+  Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
+  and M is the match object resulting from matching RX against the directive
+  text.
+  """
    def _(func):
      DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
      return func
    return _
  
  def parse_template(ps):
+  """
+  Parse a single template from the ParseState PS.
+
+  A single template is either a chunk of text (parsed by `parse_text') or a
+  directive (handled by the appropriate function in `DIRECT').
+
+  Returns either a template object, or a special token.  In particular, `EOF'
+  is returned if we run out of text; directives may return other tokens.
+  """
+
+  ## Skip initial comments.  Otherwise we might end up with an empty
+  ## SequenceTemplate here.
    while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
+
+  ## If we've run out of input, return `EOF' here.  A line beginning `%%', or
+  ## not beginning `%', means we've found a chunk of text.  Otherwise find
+  ## the right directive handler.
    if ps.curr is None: return EOF
    elif ps.curr.startswith('%'):
      if ps.curr.startswith('%%'): return parse_text(ps)
@@ -329,6 +706,16 @@ def parse_template(ps):
      return parse_text(ps)
  
  def parse_templseq(ps, nestp):
+  """
+  Parse a sequence of templates from the ParseState PS.
+
+  Calls `parse_template' repeatedly  If NESTP is true, then an `END' token
+  (presumably from a directive handler) is permitted and halts parsing;
+  otherwise `END' signifies an error.
+
+  Returns a template object.
+  """
+
    tt = []
    while True:
      t = parse_template(ps)
@@ -343,13 +730,25 @@ def parse_templseq(ps, nestp):
  
  @direct(r'repeat')
  def dir_repeat(ps, m):
+  """
+  %repeat
+  BODY
+  %end
+
+  Iterate the body over the cartesian product of the relations mentioned
+  within.
+  """
    return RepeatTemplate(parse_templseq(ps, True))
  
  @direct(r'end')
  def dir_end(ps, m):
+  """%end -- an end marker used to delimet chunks of template."""
    return END
  
  def compile_template(file, text):
+  """
+  Compile TEXT into a template, attributing errors to FILE.
+  """
    ps = ParseState(file, text)
    t = parse_templseq(ps, False)
    return t
@@ -359,17 +758,21 @@ def compile_template(file, text):
  
  op = OP.OptionParser(
    description = 'Generates files by filling in simple templates',
-  usage = 'usage: %prog [-gl] FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
+  usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
    version = 'Catacomb version @VERSION@')
+def cb_gen(opt, optstr, arg, op):
+  op.values.input = arg
+  op.values.mode = 'gen'
  for short, long, kw in [
    ('-l', '--list', dict(
        action = 'store_const', const = 'list', dest = 'mode',
        help = 'list filenames generated')),
    ('-g', '--generate', dict(
-      action = 'store', metavar = 'PATH', dest = 'input',
-      help = 'generate output (default)'))]:
+      action = 'callback', metavar = 'TEMPLATE',
+      callback = cb_gen, type = 'string',
+      help = 'generate file(s) from TEMPLATE file'))]:
    op.add_option(short, long, **kw)
-op.set_defaults(mode = 'gen')
+op.set_defaults(mode = 'what?')
  opts, args = op.parse_args()
  
  if len(args) < 1: op.error('missing FILE')
@@ -378,6 +781,9 @@ for rel in args[1:]: read_thing(rel)
  filetempl = compile_template('<output>', filepat)
  
  def filenames(filetempl):
+  """
+  Generate the filenames in the compiled filename template FILETEMPL.
+  """
    cs = CursorSet()
    rr = filetempl.relations()
    for r in rr:
@@ -390,8 +796,9 @@ def filenames(filetempl):
      if not cs.step(): break
    cs.pop()
  
+## Main dispatch.
  if opts.mode == 'list':
-  for file, cs in filenames(filetempl): print file
+  for file, cs in filenames(filetempl): print(file)
  elif opts.mode == 'gen':
    with open(opts.input) as f:
      templ = RepeatTemplate(compile_template(opts.input, f.read()))
@@ -401,6 +808,6 @@ elif opts.mode == 'gen':
        templ.subst(out, cs)
      OS.rename(new, file)
  else:
-  raise Exception, 'What am I doing here?'
+  die('What am I doing here?')
  
  ###----- That's all, folks --------------------------------------------------