math/mpgen, symm/multigen: Add copious commentary.

author Mark Wooding <mdw@distorted.org.uk>

Wed, 14 May 2014 20:35:40 +0000 (21:35 +0100)

committer Mark Wooding <mdw@distorted.org.uk>

Wed, 14 May 2014 20:36:18 +0000 (21:36 +0100)
author Mark Wooding <mdw@distorted.org.uk>
Wed, 14 May 2014 20:35:40 +0000 (21:35 +0100)
committer Mark Wooding <mdw@distorted.org.uk>
Wed, 14 May 2014 20:36:18 +0000 (21:36 +0100)
diff --git a/math/mpgen b/math/mpgen

index de03c3e..7d11bfd 100644 (file)
--- a/math/mpgen
+++ b/math/mpgen
@@ -36,6 +36,12 @@ from sys import stdout
  ### Random utilities.
  
  def write_header(mode, name):
+  """
+  Write a C-language file header.
+
+  The header comment identifies the processing MODE, and the NAME of the
+  output file.
+  """
    stdout.write("""\
  /* -*-c-*- GENERATED by mpgen (%s)
   *
@@ -45,19 +51,32 @@ def write_header(mode, name):
  """ % (mode, name))
  
  def write_banner(text):
+  """Write a separator banner to the output, with header TEXT."""
    stdout.write("/*----- %s %s*/\n" % (text, '-' * (66 - len(text))))
  
-class struct (object): pass
+class struct (object):
+  """
+  A struct object exists so that you can set attributes on it.
+  """
+  pass
  
  R_IDBAD = RX.compile('[^0-9A-Za-z]')
-def fix_name(name): return R_IDBAD.sub('_', name)
+def fix_name(name):
+  """Replace non-alphanumeric characters in NAME with underscores."""
+  return R_IDBAD.sub('_', name)
  
  ###--------------------------------------------------------------------------
  ### Determining the appropriate types.
  
+## A dictionary mapping type tags to subclasses of BasicIntType.
  TYPEMAP = {}
  
  class IntClass (type):
+  """
+  The IntClass is a metaclass for integer-type classes.
+
+  It associates the type class with its tag in the `TYPEMAP' dictionary.
+  """
    def __new__(cls, name, supers, dict):
      c = type.__new__(cls, name, supers, dict)
      try: TYPEMAP[c.tag] = c
@@ -65,6 +84,36 @@ class IntClass (type):
      return c
  
  class BasicIntType (object):
+  """
+  A base class for integer-type classes, providing defaults and protocol.
+
+  Integer-type classes have the following attributes and methods.
+
+  preamble              Some code to be emitted to a header file to make use
+                        of the type.  Defaults to the empty string.
+
+  typedef_prefix        A prefix to be written before the type's name in a
+                        `typedef' declaration, possibly to muffle warnings.
+                        Defaults to the empty string.
+
+  literalfmt            A Python `%' format string for generating literal
+                        values of the type; used by the default `literal'
+                        method (so if you override it then you don't need to
+                        set this).  Defaults to `%su'.
+
+  literal(VALUE, [FMT]) Emit a literal value of the type, encoding VALUE.
+                        The default FMT has the form `0x%0Nx', so as to emit
+                        in hex with the appropriate number of leading zeros.
+
+  Instances also carry additional attributes.
+
+  bits                  The width of the integer type, in bits.
+
+  rank                  An integer giving the conversion rank of the type.
+                        Higher values generally denote wider types.
+
+  litwd                 The width of a literal of the type, in characters.
+  """
    __metaclass__ = IntClass
    preamble = ''
    typedef_prefix = ''
@@ -113,7 +162,36 @@ class UIntMaxType (BasicIntType):
    preamble = "\n#include <stdint.h>\n"
  
  class TypeChoice (object):
+  """
+  A TypeChoice object selects C integer types for multiprecision integers.
+
+  It exports its decisions as attributes:
+
+  mpwbits               The width of an `mpw', in bits.
+
+  mpw                   The integer type for single-precision values.
+
+  mpd                   The integer type for double-precision values.
+
+  ti                    An object bearing raw information about the available
+                        integer types, as follows...
+
+    TYPEINFO            A list of items (TAG, BITS) describing the widths of
+                        the available types suitable for multiprecision use,
+                        in ascending order of rank.
+
+    LIMITS              A list of items (TAG, LO, HI) describing the ranges
+                        of integer types, for constructing the `mplimits'
+                        files.
+  """
+
    def __init__(me, tifile):
+    """
+    Read the definitions from TIFILE, and select types appropriately.
+
+    The TIFILE is a tiny fragment of Python code which should set `TYPEINFO'
+    and `LIMITS' in its global namespace.
+    """
  
      ## Load the captured type information.
      me.ti = TY.ModuleType('typeinfo')
@@ -165,9 +243,25 @@ class TypeChoice (object):
  ###--------------------------------------------------------------------------
  ### Outputting constant multiprecision integers.
  
+## The margin for outputting MP literals.
  MARGIN = 72
  
  def write_preamble():
+  """
+  Write a preamble for files containing multiprecision literals.
+
+  We define a number of macros for use by the rest of the code:
+
+  ZERO_MP               An `mp' initializer denoting the value zero.
+
+  POS_MP(NAME)          Constructs an `mp' initializer denoting a positive
+                        integer whose limbs were emitted with the given
+                        NAME.
+
+  NEG_MP(NAME)          Constructs an `mp' initializer denoting a negative
+                        integer whose limbs were emitted with the given
+                        NAME.
+  """
    stdout.write("""
  #include <mLib/macros.h>
  #define MP_(name, flags) \\
@@ -180,13 +274,24 @@ def write_preamble():
  """)
  
  def write_limbs(name, x):
+  """
+  Write the limbs of the value X, with the given NAME.
+  """
+
+  ## We don't need to do anything special for zero.
    if not x: return
+
+  ## Start on the limb vector.  No delimiter needed, and we shall need to
+  ## start a new line before any actual output.  We want to write the
+  ## absolute value here, because we use a signed-magnitude representation.
    stdout.write("\nstatic const mpw %s__mpw[] = {" % name)
    sep = ''
    pos = MARGIN
    if x < 0: x = -x
    mask = (1 << TC.mpwbits) - 1
  
+  ## We work from the little-end up, picking off `mpwbits' at a time.  Start
+  ## a new line if we can't fit the value on the current one.
    while x > 0:
      w, x = x & mask, x >> TC.mpwbits
      f = TC.mpw.literal(w)
@@ -198,17 +303,30 @@ def write_limbs(name, x):
      pos += len(f) + 2
      sep = ','
  
+  ## We're done.  Finish off the initializer.
    stdout.write("\n};\n")
  
  def mp_body(name, x):
+  """
+  Write the body of an `mp' object, for the value NAME.
+  """
    return "%s_MP(%s)" % (x >= 0 and "POS" or "NEG", name)
  
  ###--------------------------------------------------------------------------
  ### Mode definition machinery.
  
+## A dictionary mapping mode names to their handler functions.
  MODEMAP = {}
  
  def defmode(func):
+  """
+  Function decorator: associate the function with the name of a mode.
+
+  The mode name is taken from the function's name: a leading `m_' is stripped
+  off, if there is one.  Mode functions are invoked with the positional
+  arguments from the command and are expected to write their output to
+  stdout.
+  """
    name = func.func_name
    if name.startswith('m_'): name = name[2:]
    MODEMAP[name] = func
@@ -219,16 +337,39 @@ def defmode(func):
  
  @defmode
  def m_mptypes():
+  """
+  Write the `mptypes.h' header.
+
+  This establishes the following types.
+
+  mpw                   An integer type for single-precision values.
+
+  mpd                   An integer type for double-precision values.
+
+  And, for t being each of `w' or `d', the following constants:
+
+  MPt_BITS              The width of the type, in bits.
+
+  MPt_P2                The smallest integer k such that 2^k is not less than
+                        MPt_BITS.  (This is used for binary searches.)
+
+  MPt_MAX               The largest value which may be stored in an object of
+                        the type.
+  """
+
+  ## Write the preamble.
    write_header("mptypes", "mptypes.h")
    stdout.write("""\
  #ifndef CATACOMB_MPTYPES_H
  #define CATACOMB_MPTYPES_H
  """)
  
+  ## Write any additional premable for the types we've selected.
    have = set([TC.mpw, TC.mpd])
    for t in have:
      stdout.write(t.preamble)
  
+  ## Emit the types and constants.
    for label, t, bits in [('mpw', TC.mpw, TC.mpwbits),
                           ('mpd', TC.mpd, TC.mpwbits*2)]:
      LABEL = label.upper()
@@ -240,6 +381,7 @@ def m_mptypes():
      stdout.write("#define %s_MAX %s\n" % (LABEL,
                                            t.literal((1 << bits) - 1, "%d")))
  
+  ## Done.
    stdout.write("\n#endif\n")
  
  ###--------------------------------------------------------------------------
@@ -247,9 +389,21 @@ def m_mptypes():
  
  @defmode
  def m_mplimits_c():
+  """
+  Write the `mplimits.c' source file.
+
+  This contains `mp' constants corresponding to the various integer types'
+  upper and lower bounds.  The output is a vector `mp_limits' consisting of
+  the distinct nonzero bounds, in order of their first occurrence in the
+  `ti.LIMITS' list.
+  """
+
+  ## Write the preamble.
    write_header("mplimits_c", "mplimits.c")
    stdout.write('#include "mplimits.h"\n')
    write_preamble()
+
+  ## Write out limbs for limits as we come across them.
    seen = {}
    v = []
    def write(x):
@@ -261,6 +415,7 @@ def m_mplimits_c():
      write(lo)
      write(hi)
  
+  ## Write the main vector.
    stdout.write("\nmp mp_limits[] = {")
    i = 0
    sep = "\n  "
@@ -272,6 +427,14 @@ def m_mplimits_c():
  
  @defmode
  def m_mplimits_h():
+  """
+  Write the `mplimits.h' source file.
+
+  For each type TAG, this defines constants MP_TAG_MIN and MP_TAG_MAX
+  representing the lower and upper bounds of the type.
+  """
+
+  ## Write the preamble.
    write_header("mplimits_h", "mplimits.h")
    stdout.write("""\
  #ifndef CATACOMB_MPLIMITS_H
@@ -285,6 +448,8 @@ extern mp mp_limits[];
  
  """)
  
+  ## Now define constants for the bounds.  Things which are zero can go to
+  ## our existing `MP_ZERO'; otherwise we index the `mp_limits' vector.
    seen = { 0: "MP_ZERO" }
    slot = [0]
    def find(x):
@@ -298,12 +463,19 @@ extern mp mp_limits[];
      stdout.write("#define MP_%s_MIN %s\n" % (tag, find(lo)))
      stdout.write("#define MP_%s_MAX %s\n" % (tag, find(hi)))
  
+  ## All done.
    stdout.write("\n#endif\n")
  
  ###--------------------------------------------------------------------------
  ### Group tables.
  
  class GroupTableClass (type):
+  """
+  Metaclass for group tables, which registers them in the `MODEMAP'.
+
+  Such a class must define an attribute `mode' giving the mode name, and a
+  class method `run' which writes the necessary output.
+  """
    def __new__(cls, name, supers, dict):
      c = type.__new__(cls, name, supers, dict)
      try: mode = c.mode
@@ -312,10 +484,101 @@ class GroupTableClass (type):
      return c
  
  class GroupTable (object):
+  """
+  Base class for group tables objects.
+
+  A `group table' is a table of constants, typically defining a cyclic group
+  or something similar.  We read the values from an input file, and write
+  them out as C definitions.  These have a somewhat stereotyped format, so we
+  can mostly handle them uniformly.
+
+  Specifically, input files consist of lines which are split into
+  whitespace-separated words.  Blank lines, and lines beginning with `#', are
+  ignored.  The remaining lines are gathered together into stanzas of the
+  form
+
+  KEYWORD NAME [HEAD-VALUE ...]
+        SLOT VALUE
+        ...
+
+  (Indentation is shown for clarity only.)  Such a stanza describes a group
+  NAME; some slots are assigned values from the headline, and others from
+  their own individual lines.
+
+  Subclasses must define the following attributes.
+
+  data_t                The name of the type for describing a particular
+                        group.
+
+  entry_t               The name of the type which associates a name with
+                        some group data; this will be defined as
+
+                                typedef struct ENTRY_T {
+                                  const char *name;
+                                  DATA_T *data;
+                                } ENTRY_T;
+
+                        or similar.
+
+  filename              The filename, typically `SOMETHING.c', to put in the
+                        output header.
+
+  header                The header file to include, so as to establish the
+                        necessary types and other definitions.
+
+  keyword               The keyword beginning a new definition in the input
+                        file.  The default is `group'.
+
+  mode                  The mode name, used to invoke this kind of table
+                        operation (used by GroupTableClass).
+
+  slots                 A vector of slot objects (see BaseSlot for the
+                        protocol) describing the structure of this particular
+                        kind of group, in the order they should be written in
+                        an initializer.
+
+  Instances carry an `st' attribute, which contains a `struct' object in
+  which slots can maintain some state.  This object carries the following
+  attributes maintained by this class.
+
+  d                     A dictionary mapping slots (not their names!) to
+                        values assigned in the current stanza.  This is reset
+                        at the start of each stanza.  Slot implementations
+                        a free to use this or not, and the representation is
+                        internal to the specific slot class.
+
+  mpmap                 A dictionary mapping values (integers, or `None') to
+                        C initializers (typically, actually, macro
+                        invocations).
+
+  name                  The name of the group currently being parsed.
+
+  nextmp                Index for the next `mp' object to be written.
+  """
+
+  ## Additional attributes, for internal use:
+  ##
+  ## _defs              A set of known names for groups.
+  ##
+  ## _headslots         A list of slots filled in from the headline.
+  ##
+  ## _names             A list of pairs (ALIAS, DATA) mapping alias names to
+  ##                    the actual group data.
+  ##
+  ## _slotmap           A dictionary mapping slot names to their
+  ##                    descriptions.
+
    __metaclass__ = GroupTableClass
+
+  ## Default values.
    keyword = 'group'
    slots = []
+
    def __init__(me):
+    """
+    Initialize a group table object.
+    """
+
      me.st = st = struct()
      st.nextmp = 0
      st.mpmap = { None: 'NO_MP', 0: 'ZERO_MP' }
@@ -325,10 +588,22 @@ class GroupTable (object):
      me._defs = set()
      me._slotmap = dict([(s.name, s) for s in me.slots])
      me._headslots = [s for s in me.slots if s.headline]
+
    def _flush(me):
+    """
+    Write out the data for a group once we've detected the end of its stanza.
+    """
+
+    ## If there's no current stanza, then do nothing.
      if me.st.name is None: return
+
+    ## Start emitting the object.
      stdout.write("/* --- %s --- */\n" % me.st.name)
+
+    ## Get the various slots to compute themselves.
      for s in me.slots: s.setup(me.st)
+
+    ## Write the initializer.
      stdout.write("\nstatic %s c_%s = {" % (me.data_t, fix_name(me.st.name)))
      sep = "\n  "
      for s in me.slots:
@@ -336,40 +611,78 @@ class GroupTable (object):
        s.write(me.st)
        sep = ",\n  "
      stdout.write("\n};\n\n")
+
+    ## Clear the state for the next stanza.
      me.st.d = {}
      me.st.name = None
+
    @classmethod
    def run(cls, input):
+    """
+    Main output for a group table.  Reads the file INPUT.
+    """
+
+    ## Make an object for us to work on.
      me = cls()
+
+    ## Write the output preamble.
      write_header(me.mode, me.filename)
      stdout.write('#include "%s"\n' % me.header)
      write_preamble()
      stdout.write("#define NO_MP { 0, 0, 0, 0, 0, 0 }\n\n")
+
+    ## The main group data.  This will contain a `data_t' object for each
+    ## group we read.  We'll also build the name to data map as we go.
      write_banner("Group data")
      stdout.write('\n')
      with open(input) as file:
        for line in file:
+
+        ## Parse the line into fields.
          ff = line.split()
          if not ff or ff[0].startswith('#'): continue
+
          if ff[0] == 'alias':
+          ## An alias.  Just remember this.
            if len(ff) != 3: raise Exception, "wrong number of alias arguments"
            me._flush()
            me._names.append((ff[1], ff[2]))
+
          elif ff[0] == me.keyword:
+          ## A headline for a new group.
+
+          ## Check the headline syntax.  Headline slots may be set here, or
+          ## later by name.
            if len(ff) < 2 or len(ff) > 2 + len(me._headslots):
              raise Exception, "bad number of headline arguments"
+
+          ## Flush out the previous stanza.
            me._flush()
+
+          ## Remember the new stanza's name, and add it to the list.
            me.st.name = name = ff[1]
            me._defs.add(name)
            me._names.append((name, name))
+
+          ## Set headline slots from the remaining headline words.
            for f, s in zip(ff[2:], me._headslots): s.set(me.st, f)
+
          elif ff[0] in me._slotmap:
+          ## A slot assignment.  Get the slot to store a value.
+          if me.st.name is None:
+            raise Exception, "no group currently being defined"
            if len(ff) != 2:
              raise Exception, "bad number of values for slot `%s'" % ff[0]
            me._slotmap[ff[0]].set(me.st, ff[1])
+
          else:
+          ## Something incomprehensible.
            raise Exception, "unknown keyword `%s'" % ff[0]
+
+    ## End of the input.  Write out the final stanza.
      me._flush()
+
+    ## Now for the name-to-data mapping.
      write_banner("Main table")
      stdout.write("\nconst %s %s[] = {\n" % (me.entry_t, me.tabname))
      for a, n in me._names:
@@ -377,46 +690,139 @@ class GroupTable (object):
          raise Exception, "alias `%s' refers to unknown group `%s'" % (a, n)
        stdout.write('  { "%s", &c_%s },\n' % (a, fix_name(n)))
      stdout.write("  { 0, 0 }\n};\n\n")
+
+    ## We're done.
      write_banner("That's all, folks")
  
  class BaseSlot (object):
+  """
+  Base class for slot types.
+
+  The slot protocol works as follows.  Throughout, ST is a state object as
+  maintained by a GroupTable.
+
+  __init__(NAME, [HEADLINE], [OMITP], [ALLOWP], ...)
+                        Initialize the slot.  The NAME identifies the slot,
+                        and the keyword used to set it in input files.  If
+                        HEADLINE is true then the slot can be set from the
+                        stanza headline.  OMITP and ALLOWP are optional
+                        functions: if OMITP(ST) returns true then the slot
+                        may be omitted; conversely, if ALLOWP(ST, VALUE)
+                        returns false then the slot cannot be assigned the
+                        given VALUE.  Other arguments may be allowed by
+                        specific slot types.
+
+  set(ST, VALUE)        Set the slot to the given VALUE, typically by setting
+                        ST.d[me].  The default just stores the VALUE without
+                        interpreting it.
+
+  setup(ST)             Prepare the slot for output.  The default method just
+                        checks that ST.d contains a mapping for the slot.
+                        All of the stanza's slots are set up before starting
+                        on the initializer for the group data, so slots can
+                        use this opportunity to emit preparatory definitions.
+
+  write(ST)             Write an initializer for the slot to standard
+                        output.  There is no default.
+
+  The following attributes are exported.
+
+  headline              A flag: can the slot be initialized from the stanza
+                        headline?
+
+  name                  The slot's name.
+  """
+
    def __init__(me, name, headline = False, omitp = None, allowp = None):
+    """
+    Initialize a new slot object, setting the necessary attributes.
+    """
      me.name = name
      me.headline = headline
      me._omitp = omitp
      me._allowp = allowp
+
    def set(me, st, value):
+    """
+    Store a VALUE for the slot.
+    """
      if me._allowp and not me._allowp(st, value):
        raise Exception, "slot `%s' not allowed here" % me.name
      st.d[me] = value
+
    def setup(me, st):
+    """
+    Prepare the slot for output, checking its value and so on.
+    """
      if me not in st.d and (not me._omitp or not me._omitp(st)):
        raise Exception, "missing slot `%s'" % me.name
  
  class EnumSlot (BaseSlot):
+  """
+  An EnumSlot object represents a slot which can contain one of a number of
+  named values.
+
+  An omitted value is written as a literal `0'.
+  """
+
    def __init__(me, name, prefix, values, **kw):
+    """
+    Initialize an EnumSlot object.
+
+    The VALUES are a set of value names.  On output, a value is converted to
+    uppercase, and prefixed by the PREFIX and an underscore.
+    """
      super(EnumSlot, me).__init__(name, **kw)
      me._values = set(values)
      me._prefix = prefix
+
    def set(me, st, value):
+    """
+    Check that the VALUE is one of the ones we know.
+    """
      if value not in me._values:
        raise Exception, "invalid %s value `%s'" % (me.name, value)
      super(EnumSlot, me).set(st, value)
+
    def write(me, st):
+    """
+    Convert the slot value to the C constant name.
+    """
      try: stdout.write('%s_%s' % (me._prefix, st.d[me].upper()))
      except KeyError: stdout.write('0')
  
  class MPSlot (BaseSlot):
+  """
+  An MPSlot object represents a slot which can contain a multiprecision
+  integer.
+
+  An omitted value is written as a invalid `mp' object.
+  """
+
    def set(me, st, value):
+    """
+    Set a value; convert it to a Python integer.
+    """
      super(MPSlot, me).set(st, long(value, 0))
+
    def setup(me, st):
+    """
+    Prepare to write the slot.
+
+    If this is a new integer, then write out a limb vector.  Names for the
+    limbs are generated unimaginitively, using a counter.
+    """
      super(MPSlot, me).setup(st)
      v = st.d.get(me)
      if v not in st.mpmap:
        write_limbs('v%d' % st.nextmp, v)
        st.mpmap[v] = mp_body('v%d' % st.nextmp, v)
        st.nextmp += 1
+
    def write(me, st):
+    """
+    Write out an `mp' initializer for the slot.
+    """
      stdout.write(st.mpmap[st.d.get(me)])
  
  class BinaryGroupTable (GroupTable):
@@ -473,11 +879,14 @@ for shortopt, longopt, kw in [
  op.set_defaults(typeinfo = './typeinfo.py')
  opts, args = op.parse_args()
  
+## Parse the positional arguments.
  if len(args) < 1: op.error('missing MODE')
  mode = args[0]
  
+## Establish the choice of low-level C types.
  TC = TypeChoice(opts.typeinfo)
  
+## Find the selected mode, and invoke the appropriate handler.
  try: modefunc = MODEMAP[mode]
  except KeyError: op.error("unknown mode `%s'" % mode)
  modefunc(*args[1:])
diff --git a/symm/multigen b/symm/multigen

index 20024bc..f498ce2 100755 (executable)
--- a/symm/multigen
+++ b/symm/multigen
@@ -36,45 +36,113 @@ from sys import argv, exit, stderr
  ###--------------------------------------------------------------------------
  ### Utilities.
  
-QUIS = OS.path.basename(argv[0])
+QUIS = OS.path.basename(argv[0])        # Program name, for use in errors.
  
  def die(msg):
+  """Report MSG as a fatal error, and exit."""
    stderr.write('%s: %s\n' % (QUIS, msg))
    exit(1)
  
  def indexed(seq):
+  """
+  Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
+  """
    return IT.izip(IT.count(), seq)
  
  ###--------------------------------------------------------------------------
  ### Reading the input values.
  
+## Map column names to (Relation, # index) pairs.
  COLMAP = {}
  
  class Cursor (object):
+  """
+  A Cursor object keeps track of an iteration through a Relation.
+
+  At any time, the Cursor has a `current' row; the individual cells of this
+  row may be retrieved using Python's standard indexing operator.  The `step'
+  method advances to the next row (if there is one).  The `reset' method
+  returns to row zero.
+  """
+
    def __init__(me, rel):
+    """
+    Initialize a new Cursor object, tracking its way through a Relation REL.
+
+    The new Cursor has row zero as its current row.  The REL must not be
+    empty.
+    """
      me._rel = rel
      me.reset()
+
    def step(me):
+    """
+    Advance the Cursor to the next row.
+
+    Returns False if there is no next row; otherwise True.
+    """
      me._i += 1
      if me._i >= len(me._rel):
        me._i = me._row = None
        return False
      me._row = me._rel[me._i]
      return True
+
    def reset(me):
+    """
+    Reset the Cursor, so that row zero is current again.
+    """
      me._i = 0
      me._row = me._rel[0]
+
    def __getitem__(me, i):
+    """
+    Return the item in column I of the Cursor's current row.
+
+    The index must be acceptable to the underlying row object, but otherwise
+    the Cursor imposes no restrictions.  Indices need not be numeric, for
+    example.
+    """
      return me._row[i]
+
    def __repr__(me):
+    """
+    Return a text description of the Cursor, for diagnostic use.
+    """
      return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
  
  class CursorSet (object):
+  """
+  A CursorSet iterates over the cartiesian product of a number of Relations.
+
+  More precisely: it maintains a stack, each level of which tracks a number
+  of Relations.  More Relations can be pushed onto this stack with the `push'
+  method, and removed with `pop'.  The `step' method advances through the
+  cartesian product of the Relations in the top level of the stack -- the
+  `active' Relations.  Columns from the current rows of all of the currently
+  known Relations -- whether active or not -- can be extracted using `get'.
+  """
+
    def __init__(me):
+    """
+    Initialize a new CursorSet object.
+
+    A new CursorSet has an empty stack.
+    """
      me._map = {}
      me._stack = []
      me._act = None
+
    def push(me, rels):
+    """
+    Push the new Relations RELS onto the stack and start iterating.
+
+    The currently active Relations are pushed down.  Those Relations which are
+    not already known to the CursorSet become the newly active collection.
+    (Relations which are already known are simply ignored.)
+
+    Iteration traverses Relations on the right more rapidly.
+    """
      cc = []
      rr = []
      for r in rels:
@@ -84,7 +152,14 @@ class CursorSet (object):
        cc.append(c)
      me._stack.append((me._act, rr))
      me._act = cc
+
    def step(me):
+    """
+    Advance the CursorSet through the currently active Relations.
+
+    Return False if the active Relations have now been exhausted; otherwise
+    return True.
+    """
      i = 0
      while i < len(me._act):
        if me._act[i].step(): return True
@@ -92,35 +167,90 @@ class CursorSet (object):
        me._act[i].reset()
        i += 1
      return False
+
    def pop(me):
+    """
+    Pop the active Relations.
+
+    Return to iterating over the previously active collection.
+    """
      me._act, rels = me._stack.pop()
      for r in rels: del me._map[r]
+
    def get(me, rel, i):
+    """
+    Return the item with index I in the current row of Relation REL.
+    """
      return me._map[rel][i]
  
  class Relation (object):
+  """
+  A Relation keeps track of a table of data.
+
+  A Relation consists of a `header', which is a sequence of string names,
+  and a rectangular array of data, each row of which has the same number of
+  items as the header.
+
+  Relations can be iterated over using Cursors and CursorSets.
+  """
+
    def __init__(me, head):
+    """
+    Initialize a new, empty Relation with header HEAD.
+
+    The `COLMAP' dictionary is updated to map the names in the header to this
+    Relation and its column indices.
+    """
      me._head = head
      me._rows = []
      for i, c in indexed(head): COLMAP[c] = me, i
+
    def addrow(me, row):
+    """
+    Add a ROW to the Relation.
+
+    The new row must have the correct number of entries.
+    """
      if len(row) != len(me._head):
        die("mismatch: row `%s' doesn't match heading `%s'" %
            (', '.join(row), ', '.join(me._head)))
      me._rows.append(row)
+
    def __len__(me):
+    """Return the number of rows in the Relation."""
      return len(me._rows)
+
    def __getitem__(me, i):
+    """Return the Ith row of the Relation."""
      return me._rows[i]
+
    def __repr__(me):
+    """Return a textual description of the Relation, for diagnostic use."""
      return '#<Relation %r>' % me._head
  
  def read_immediate(word):
+  """
+  Return a Relation constructed by parsing WORD.
+
+  The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
+  comma-separated lists of strings which will form the relation's header and
+  rows respectively.  There is no way to include an item which contains a
+  comma or whitespace.
+  """
    head, rels = word.split('=', 1)
    rel = Relation([c.strip() for c in head.split(',')])
    for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
  
  def read_file(spec):
+  """
+  Return a Relation constructed from a file, according to SPEC.
+
+  The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
+  comma-separated list of strings to form the relation's header.  Each line
+  from the file which is neither empty nor begins with `#' is split into
+  whitespace-separated words to form a row in the relation.  There is no way
+  to include an item which contains whitespace.
+  """
    file, head = spec.split(':', 1)
    rel = Relation([c.strip() for c in head.split(',')])
    with open(file) as f:
@@ -130,6 +260,13 @@ def read_file(spec):
        rel.addrow(line.split())
  
  def read_thing(spec):
+  """
+  Return a relation constructed from SPEC.
+
+  If SPEC begins with `@' then read the relation from a file (see
+  `read_file'); otherwise interpret it as immediate data (see
+  `read_immediate').
+  """
    if spec.startswith('@'): read_file(spec[1:])
    else: read_immediate(spec)
  
@@ -137,42 +274,104 @@ def read_thing(spec):
  ### Template structure.
  
  class BasicTemplate (object):
+  """
+  Base class for template objects.
+
+  The protocol for templates consists of two methods:
+
+  relations()           Return a set of Relations mentioned at top-level in
+                        substitutions in the template.
+
+  subst(OUT, CS)        Fill in the template, writing the output to the
+                        stream OUT.  The CS is a CursorSet object tracking
+                        the current iteration state.
+  """
    pass
  
  class LiteralTemplate (BasicTemplate):
+  """
+  A LiteralTemplate outputs a fixed string.
+  """
+
    def __init__(me, text, **kw):
+    """
+    Initialize a new LiteralTemplate object.  TEXT is the text to be written.
+    """
      super(LiteralTemplate, me).__init__(**kw)
      me._text = text
+
    def relations(me):
+    """A LiteralTemplate contains no substitutions."""
      return set()
+
    def subst(me, out, cs):
+    """A LiteralTemplate just emits its text."""
      out.write(me._text)
+
    def __repr__(me):
      return '#<LiteralTemplate %r>' % me._text
  
  class TagTemplate (BasicTemplate):
+  """
+  A TagTemplate object expands a substitution tag.
+
+  It extracts an item from the current row of a relation, processes it
+  according to an operation, and outputs the result.
+  """
+
    def __init__(me, rel, i, op, **kw):
+    """
+    Initialize a new TagTemplate object.
+
+    REL is the relation from which to pick the output; I is the column index;
+    OP is a transformation to apply to the data, and may be None to indicate
+    that the data should not be transformed.
+    """
      super(TagTemplate, me).__init__(**kw)
      me._rel = rel
      me._i = i
      me._op = op
+
    def relations(me):
+    """The TagTemplate knows which relation it uses."""
      return set([me._rel])
+
    def subst(me, out, cs):
+    """
+    A TagTemplate extracts and transforms an item from the current row of
+    a relation.
+    """
      val = cs.get(me._rel, me._i)
      if me._op is not None: val = me._op(val)
      out.write(val)
+
    def __repr__(me):
      return '#<TagTemplate %s>' % me._rel._head[me._i]
  
  class SequenceTemplate (BasicTemplate):
+  """
+  A SequenceTemplate concatenates a number of other templates.
+  """
+
    def __new__(cls, seq, **kw):
+    """
+    Construct a template from a sequence SEQ of other templates.
+
+    If SEQ is a singleton (which it often is) then return it directly;
+    otherwise construct a SequenceTemplate.
+    """
      if len(seq) == 1:
        return seq[0]
      else:
        return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
  
    def __init__(me, seq, **kw):
+    """
+    Initialize a new SequenceTemplate object from SEQ.
+
+    The sequence is flattened out: if SEQ contains SequenceTemplates then we
+    use their children directly, so that we don't have a useless tree.
+    """
      super(SequenceTemplate, me).__init__(**kw)
      tt = []
      cls = type(me)
@@ -182,20 +381,46 @@ class SequenceTemplate (BasicTemplate):
      me._seq = tt
  
    def relations(me):
+    """
+    The relations of a SequenceTemplate are the union of the relations of its
+    children.
+    """
      rr = set()
      for t in me._seq: rr.update(t.relations())
      return rr
+
    def subst(me, out, cs):
+    """
+    The output of a SequenceTemplate is the concatenation of the expansions
+    of its children.
+    """
      for t in me._seq: t.subst(out, cs)
+
    def __repr__(me):
      return '#<SequenceTemplate %r>' % me._seq
  
  class RepeatTemplate (BasicTemplate):
+  """
+  A RepeatTemplate iterates its body over a number of relations.
+  """
+
    def __init__(me, sub):
+    """
+    Initialize a new RepeatTemplate, given a template to act as its body.
+    """
      me._sub = sub
+
    def relations(me):
+    """
+    A RepeatTemplate hides the relations of its body.
+    """
      return set()
+
    def subst(me, out, cs):
+    """
+    Substitute a RepeatTemplate, by iterating over the relations mentioned in
+    its body template.
+    """
      rr = me._sub.relations()
      for r in rr:
        if len(r) == 0: return
@@ -204,6 +429,7 @@ class RepeatTemplate (BasicTemplate):
        me._sub.subst(out, cs)
        if not cs.step(): break
      cs.pop()
+
    def __repr__(me):
      return '#<RepeatTemplate %r>' % me._sub
  
@@ -211,78 +437,174 @@ class RepeatTemplate (BasicTemplate):
  ### Some slightly cheesy parsing machinery.
  
  class ParseState (object):
+  """
+  A ParseState object keeps track of a parser's position in a file.
+
+  The `curr' slot contains the current line under consideration.
+  """
+
    def __init__(me, file, text):
+    """
+    Initialize a ParseState object.
+
+    The FILE is a string naming the source file, and the TEXT is an iterator
+    over the file's lines.
+    """
      me._file = file
      me._i = 0
      me._it = iter(text.splitlines(True))
      me.step()
+
    def step(me):
+    """
+    Advance the ParseState to the next line.
+
+    Sets `curr' to the next line, or to None if the input is exhausted.
+    """
      try: me.curr = me._it.next()
      except StopIteration: me.curr = None
      else: me._i += 1
+
    def error(me, msg):
+    """
+    Report a fatal error during parsing, attributing it to the current line.
+    """
      die('%s:%d: %s' % (me._file, me._i, msg))
  
  class token (object):
+  """
+  A token object has no interesting properties other than its identity.
+  """
+
    def __init__(me, name):
+    """Initialize a new token, with the given NAME."""
      me._name = name
    def __repr__(me):
+    """Return a description of the token, for diagnostic purposes."""
      return '#<%s>' % me._name
  
+## Some magical tokens useful during parsing.
  EOF = token('eof')
  END = token('end')
  
+## Regular expressions matching substitution tags.
  R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
  R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
  
+## A dictionary mapping operation names to functions which implement them.
  OPMAP = {}
  
  def defop(func):
+  """
+  Decorator for substitution operator functions.
+
+  Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
+  name, removing a prefix `op_' if there is one.
+
+  An operator function is given the raw value as an argument and should
+  return the transformed value.
+  """
    name = func.func_name
    if name.startswith('op_'): name = name[3:]
    OPMAP[name] = func
    return func
  
  @defop
-def op_u(val): return val.upper()
+def op_u(val):
+  """@{COLUMN:u} -- the item in upper case."""
+  return val.upper()
  
  @defop
-def op_l(val): return val.lower()
+def op_l(val):
+  """@{COLUMN:l} -- the item in upper case."""
+  return val.lower()
  
  R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
  @defop
-def op_c(val): return R_NOTIDENT.sub('_', val)
+def op_c(val):
+  """
+  @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
+  """
+  return R_NOTIDENT.sub('_', val)
  
  def _pairify(val):
+  """
+  Split VAL into two, at an `=' sign.
+
+  If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
+  otherwise return (VAL, VAL).
+  """
    c = val.find('=')
    if c >= 0: return val[:c], val[c + 1:]
    else: return val, val
  
  @defop
-def op_left(val): return _pairify(val)[0]
+def op_left(val):
+  """@{COLUMN:left} -- the left-hand side of the item."""
+  return _pairify(val)[0]
  @defop
-def op_right(val): return _pairify(val)[1]
+def op_right(val):
+  """@{COLUMN:right} -- the left-hand side of the item."""
+  return _pairify(val)[1]
  
  def parse_text(ps):
+  """
+  Parse a chunk of text from a ParseState.
+
+  Stop when we get to something which looks like a template keyword, but
+  extract tags.  Return the resulting template.
+
+  Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'.  The text may
+  contain comments beginning `%#', which are ignored, and lines beginning
+  `%%' which have the initial `%' removed and are otherwise treated as normal
+  text (and, in particular, may contain tags).  Other lines beginning with
+  `%' are directives and must be processed by our caller.
+  """
+
+  ## Starting out: no templates collected, and an empty buffer of literal
+  ## text.
    tt = []
    lit = StringIO()
+
    def spill():
+    ## Spill accumulated literal text from `lit' into a LiteralTemplate
+    ## object.
      l = lit.getvalue()
      if l: tt.append(LiteralTemplate(l))
      lit.reset()
      lit.truncate()
+
+  ## Iterate over the lines of input.
    while True:
      line = ps.curr
+
+    ## Stop if there's no more text; handle lines beginning with `%'.
      if line is None: break
      elif line.startswith('%'):
        if line.startswith('%#'): ps.step(); continue
        elif line.startswith('%%'): line = line[1:]
        else: break
+
+    ## Work through the line, finding tags.
      i = 0
      while True:
+
+      ## If there are no more `@' signs, there can be no more tags, and we're
+      ## done.
        j = line.find('@', i)
        if j < 0: break
+
+      ## Write the chunk we've found.
        lit.write(line[i:j])
+
+      ## If the next character is also `@' then this is an escape and we
+      ## should carry on.
+      if line[j:].startswith('@@'):
+        lit.write('@')
+        i = j + 2
+        continue
+
+      ## Parse the tag into a column name, and maybe some operators.
        m = R_SIMPLETAG.match(line, j)
        if not m: m = R_COMPLEXTAG.match(line, j)
        if not m: ps.error('invalid tag')
@@ -290,6 +612,8 @@ def parse_text(ps):
        try: rel, i = COLMAP[col]
        except KeyError: ps.error("unknown column `%s'" % col)
        ops = m.lastindex >= 2 and m.group(2)
+
+      ## If we have operators then look them up and compose them.
        wholeop = None
        if ops:
          for opname in ops[1:].split(':'):
@@ -297,24 +621,60 @@ def parse_text(ps):
            except KeyError: ps.error("unknown operation `%s'" % opname)
            if wholeop is None: wholeop = op
            else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
+
+      ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
+      ## for the tag.
        spill()
        tt.append(TagTemplate(rel, i, wholeop))
+
+      ## Continue from after the tag.
        i = m.end()
+
+    ## Finished a line.  Write out the remainder of the line and move onto
+    ## the next.
      lit.write(line[i:])
      ps.step()
+
+  ## Run out of things to do.  Flush out the rest of the literal text and
+  ## combine the templates.
    spill()
    return SequenceTemplate(tt)
  
+## A dictionary mapping regular expressions to directive-processing functions.
  DIRECT = []
  
  def direct(rx):
+  """
+  Function decorator for template file directives.
+
+  Associate the regular expression RX with the function in `DIRECT'.
+  Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
+  and M is the match object resulting from matching RX against the directive
+  text.
+  """
    def _(func):
      DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
      return func
    return _
  
  def parse_template(ps):
+  """
+  Parse a single template from the ParseState PS.
+
+  A single template is either a chunk of text (parsed by `parse_text') or a
+  directive (handled by the appropriate function in `DIRECT').
+
+  Returns either a template object, or a special token.  In particular, `EOF'
+  is returned if we run out of text; directives may return other tokens.
+  """
+
+  ## Skip initial comments.  Otherwise we might end up with an empty
+  ## SequenceTemplate here.
    while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
+
+  ## If we've run out of input, return `EOF' here.  A line beginning `%%', or
+  ## not beginning `%', means we've found a chunk of text.  Otherwise find
+  ## the right directive handler.
    if ps.curr is None: return EOF
    elif ps.curr.startswith('%'):
      if ps.curr.startswith('%%'): return parse_text(ps)
@@ -329,6 +689,16 @@ def parse_template(ps):
      return parse_text(ps)
  
  def parse_templseq(ps, nestp):
+  """
+  Parse a sequence of templates from the ParseState PS.
+
+  Calls `parse_template' repeatedly  If NESTP is true, then an `END' token
+  (presumably from a directive handler) is permitted and halts parsing;
+  otherwise `END' signifies an error.
+
+  Returns a template object.
+  """
+
    tt = []
    while True:
      t = parse_template(ps)
@@ -343,13 +713,25 @@ def parse_templseq(ps, nestp):
  
  @direct(r'repeat')
  def dir_repeat(ps, m):
+  """
+  %repeat
+  BODY
+  %end
+
+  Iterate the body over the cartesian product of the relations mentioned
+  within.
+  """
    return RepeatTemplate(parse_templseq(ps, True))
  
  @direct(r'end')
  def dir_end(ps, m):
+  """%end -- an end marker used to delimet chunks of template."""
    return END
  
  def compile_template(file, text):
+  """
+  Compile TEXT into a template, attributing errors to FILE.
+  """
    ps = ParseState(file, text)
    t = parse_templseq(ps, False)
    return t
@@ -378,6 +760,9 @@ for rel in args[1:]: read_thing(rel)
  filetempl = compile_template('<output>', filepat)
  
  def filenames(filetempl):
+  """
+  Generate the filenames in the compiled filename template FILETEMPL.
+  """
    cs = CursorSet()
    rr = filetempl.relations()
    for r in rr:
@@ -390,6 +775,7 @@ def filenames(filetempl):
      if not cs.step(): break
    cs.pop()
  
+## Main dispatch.
  if opts.mode == 'list':
    for file, cs in filenames(filetempl): print file
  elif opts.mode == 'gen':
author	Mark Wooding <mdw@distorted.org.uk>
	Wed, 14 May 2014 20:35:40 +0000 (21:35 +0100)
committer	Mark Wooding <mdw@distorted.org.uk>
	Wed, 14 May 2014 20:36:18 +0000 (21:36 +0100)
math/mpgen		patch \| blob \| blame \| history
symm/multigen		patch \| blob \| blame \| history