### Random utilities.
def write_header(mode, name):
+ """
+ Write a C-language file header.
+
+ The header comment identifies the processing MODE, and the NAME of the
+ output file.
+ """
stdout.write("""\
/* -*-c-*- GENERATED by mpgen (%s)
*
""" % (mode, name))
def write_banner(text):
+ """Write a separator banner to the output, with header TEXT."""
stdout.write("/*----- %s %s*/\n" % (text, '-' * (66 - len(text))))
-class struct (object): pass
+class struct (object):
+ """
+ A struct object exists so that you can set attributes on it.
+ """
+ pass
R_IDBAD = RX.compile('[^0-9A-Za-z]')
-def fix_name(name): return R_IDBAD.sub('_', name)
+def fix_name(name):
+ """Replace non-alphanumeric characters in NAME with underscores."""
+ return R_IDBAD.sub('_', name)
###--------------------------------------------------------------------------
### Determining the appropriate types.
+## A dictionary mapping type tags to subclasses of BasicIntType.
TYPEMAP = {}
class IntClass (type):
+ """
+ The IntClass is a metaclass for integer-type classes.
+
+ It associates the type class with its tag in the `TYPEMAP' dictionary.
+ """
def __new__(cls, name, supers, dict):
c = type.__new__(cls, name, supers, dict)
try: TYPEMAP[c.tag] = c
return c
class BasicIntType (object):
+ """
+ A base class for integer-type classes, providing defaults and protocol.
+
+ Integer-type classes have the following attributes and methods.
+
+ preamble Some code to be emitted to a header file to make use
+ of the type. Defaults to the empty string.
+
+ typedef_prefix A prefix to be written before the type's name in a
+ `typedef' declaration, possibly to muffle warnings.
+ Defaults to the empty string.
+
+ literalfmt A Python `%' format string for generating literal
+ values of the type; used by the default `literal'
+ method (so if you override it then you don't need to
+ set this). Defaults to `%su'.
+
+ literal(VALUE, [FMT]) Emit a literal value of the type, encoding VALUE.
+ The default FMT has the form `0x%0Nx', so as to emit
+ in hex with the appropriate number of leading zeros.
+
+ Instances also carry additional attributes.
+
+ bits The width of the integer type, in bits.
+
+ rank An integer giving the conversion rank of the type.
+ Higher values generally denote wider types.
+
+ litwd The width of a literal of the type, in characters.
+ """
__metaclass__ = IntClass
preamble = ''
typedef_prefix = ''
preamble = "\n#include <stdint.h>\n"
class TypeChoice (object):
+ """
+ A TypeChoice object selects C integer types for multiprecision integers.
+
+ It exports its decisions as attributes:
+
+ mpwbits The width of an `mpw', in bits.
+
+ mpw The integer type for single-precision values.
+
+ mpd The integer type for double-precision values.
+
+ ti An object bearing raw information about the available
+ integer types, as follows...
+
+ TYPEINFO A list of items (TAG, BITS) describing the widths of
+ the available types suitable for multiprecision use,
+ in ascending order of rank.
+
+ LIMITS A list of items (TAG, LO, HI) describing the ranges
+ of integer types, for constructing the `mplimits'
+ files.
+ """
+
def __init__(me, tifile):
+ """
+ Read the definitions from TIFILE, and select types appropriately.
+
+ The TIFILE is a tiny fragment of Python code which should set `TYPEINFO'
+ and `LIMITS' in its global namespace.
+ """
## Load the captured type information.
me.ti = TY.ModuleType('typeinfo')
###--------------------------------------------------------------------------
### Outputting constant multiprecision integers.
+## The margin for outputting MP literals.
MARGIN = 72
def write_preamble():
+ """
+ Write a preamble for files containing multiprecision literals.
+
+ We define a number of macros for use by the rest of the code:
+
+ ZERO_MP An `mp' initializer denoting the value zero.
+
+ POS_MP(NAME) Constructs an `mp' initializer denoting a positive
+ integer whose limbs were emitted with the given
+ NAME.
+
+ NEG_MP(NAME) Constructs an `mp' initializer denoting a negative
+ integer whose limbs were emitted with the given
+ NAME.
+ """
stdout.write("""
#include <mLib/macros.h>
#define MP_(name, flags) \\
""")
def write_limbs(name, x):
+ """
+ Write the limbs of the value X, with the given NAME.
+ """
+
+ ## We don't need to do anything special for zero.
if not x: return
+
+ ## Start on the limb vector. No delimiter needed, and we shall need to
+ ## start a new line before any actual output. We want to write the
+ ## absolute value here, because we use a signed-magnitude representation.
stdout.write("\nstatic const mpw %s__mpw[] = {" % name)
sep = ''
pos = MARGIN
if x < 0: x = -x
mask = (1 << TC.mpwbits) - 1
+ ## We work from the little-end up, picking off `mpwbits' at a time. Start
+ ## a new line if we can't fit the value on the current one.
while x > 0:
w, x = x & mask, x >> TC.mpwbits
f = TC.mpw.literal(w)
pos += len(f) + 2
sep = ','
+ ## We're done. Finish off the initializer.
stdout.write("\n};\n")
def mp_body(name, x):
+ """
+ Write the body of an `mp' object, for the value NAME.
+ """
return "%s_MP(%s)" % (x >= 0 and "POS" or "NEG", name)
###--------------------------------------------------------------------------
### Mode definition machinery.
+## A dictionary mapping mode names to their handler functions.
MODEMAP = {}
def defmode(func):
+ """
+ Function decorator: associate the function with the name of a mode.
+
+ The mode name is taken from the function's name: a leading `m_' is stripped
+ off, if there is one. Mode functions are invoked with the positional
+ arguments from the command and are expected to write their output to
+ stdout.
+ """
name = func.func_name
if name.startswith('m_'): name = name[2:]
MODEMAP[name] = func
@defmode
def m_mptypes():
+ """
+ Write the `mptypes.h' header.
+
+ This establishes the following types.
+
+ mpw An integer type for single-precision values.
+
+ mpd An integer type for double-precision values.
+
+ And, for t being each of `w' or `d', the following constants:
+
+ MPt_BITS The width of the type, in bits.
+
+ MPt_P2 The smallest integer k such that 2^k is not less than
+ MPt_BITS. (This is used for binary searches.)
+
+ MPt_MAX The largest value which may be stored in an object of
+ the type.
+ """
+
+ ## Write the preamble.
write_header("mptypes", "mptypes.h")
stdout.write("""\
#ifndef CATACOMB_MPTYPES_H
#define CATACOMB_MPTYPES_H
""")
+ ## Write any additional premable for the types we've selected.
have = set([TC.mpw, TC.mpd])
for t in have:
stdout.write(t.preamble)
+ ## Emit the types and constants.
for label, t, bits in [('mpw', TC.mpw, TC.mpwbits),
('mpd', TC.mpd, TC.mpwbits*2)]:
LABEL = label.upper()
stdout.write("#define %s_MAX %s\n" % (LABEL,
t.literal((1 << bits) - 1, "%d")))
+ ## Done.
stdout.write("\n#endif\n")
###--------------------------------------------------------------------------
@defmode
def m_mplimits_c():
+ """
+ Write the `mplimits.c' source file.
+
+ This contains `mp' constants corresponding to the various integer types'
+ upper and lower bounds. The output is a vector `mp_limits' consisting of
+ the distinct nonzero bounds, in order of their first occurrence in the
+ `ti.LIMITS' list.
+ """
+
+ ## Write the preamble.
write_header("mplimits_c", "mplimits.c")
stdout.write('#include "mplimits.h"\n')
write_preamble()
+
+ ## Write out limbs for limits as we come across them.
seen = {}
v = []
def write(x):
write(lo)
write(hi)
+ ## Write the main vector.
stdout.write("\nmp mp_limits[] = {")
i = 0
sep = "\n "
@defmode
def m_mplimits_h():
+ """
+ Write the `mplimits.h' source file.
+
+ For each type TAG, this defines constants MP_TAG_MIN and MP_TAG_MAX
+ representing the lower and upper bounds of the type.
+ """
+
+ ## Write the preamble.
write_header("mplimits_h", "mplimits.h")
stdout.write("""\
#ifndef CATACOMB_MPLIMITS_H
""")
+ ## Now define constants for the bounds. Things which are zero can go to
+ ## our existing `MP_ZERO'; otherwise we index the `mp_limits' vector.
seen = { 0: "MP_ZERO" }
slot = [0]
def find(x):
stdout.write("#define MP_%s_MIN %s\n" % (tag, find(lo)))
stdout.write("#define MP_%s_MAX %s\n" % (tag, find(hi)))
+ ## All done.
stdout.write("\n#endif\n")
###--------------------------------------------------------------------------
### Group tables.
class GroupTableClass (type):
+ """
+ Metaclass for group tables, which registers them in the `MODEMAP'.
+
+ Such a class must define an attribute `mode' giving the mode name, and a
+ class method `run' which writes the necessary output.
+ """
def __new__(cls, name, supers, dict):
c = type.__new__(cls, name, supers, dict)
try: mode = c.mode
return c
class GroupTable (object):
+ """
+ Base class for group tables objects.
+
+ A `group table' is a table of constants, typically defining a cyclic group
+ or something similar. We read the values from an input file, and write
+ them out as C definitions. These have a somewhat stereotyped format, so we
+ can mostly handle them uniformly.
+
+ Specifically, input files consist of lines which are split into
+ whitespace-separated words. Blank lines, and lines beginning with `#', are
+ ignored. The remaining lines are gathered together into stanzas of the
+ form
+
+ KEYWORD NAME [HEAD-VALUE ...]
+ SLOT VALUE
+ ...
+
+ (Indentation is shown for clarity only.) Such a stanza describes a group
+ NAME; some slots are assigned values from the headline, and others from
+ their own individual lines.
+
+ Subclasses must define the following attributes.
+
+ data_t The name of the type for describing a particular
+ group.
+
+ entry_t The name of the type which associates a name with
+ some group data; this will be defined as
+
+ typedef struct ENTRY_T {
+ const char *name;
+ DATA_T *data;
+ } ENTRY_T;
+
+ or similar.
+
+ filename The filename, typically `SOMETHING.c', to put in the
+ output header.
+
+ header The header file to include, so as to establish the
+ necessary types and other definitions.
+
+ keyword The keyword beginning a new definition in the input
+ file. The default is `group'.
+
+ mode The mode name, used to invoke this kind of table
+ operation (used by GroupTableClass).
+
+ slots A vector of slot objects (see BaseSlot for the
+ protocol) describing the structure of this particular
+ kind of group, in the order they should be written in
+ an initializer.
+
+ Instances carry an `st' attribute, which contains a `struct' object in
+ which slots can maintain some state. This object carries the following
+ attributes maintained by this class.
+
+ d A dictionary mapping slots (not their names!) to
+ values assigned in the current stanza. This is reset
+ at the start of each stanza. Slot implementations
+ a free to use this or not, and the representation is
+ internal to the specific slot class.
+
+ mpmap A dictionary mapping values (integers, or `None') to
+ C initializers (typically, actually, macro
+ invocations).
+
+ name The name of the group currently being parsed.
+
+ nextmp Index for the next `mp' object to be written.
+ """
+
+ ## Additional attributes, for internal use:
+ ##
+ ## _defs A set of known names for groups.
+ ##
+ ## _headslots A list of slots filled in from the headline.
+ ##
+ ## _names A list of pairs (ALIAS, DATA) mapping alias names to
+ ## the actual group data.
+ ##
+ ## _slotmap A dictionary mapping slot names to their
+ ## descriptions.
+
__metaclass__ = GroupTableClass
+
+ ## Default values.
keyword = 'group'
slots = []
+
def __init__(me):
+ """
+ Initialize a group table object.
+ """
+
me.st = st = struct()
st.nextmp = 0
st.mpmap = { None: 'NO_MP', 0: 'ZERO_MP' }
me._defs = set()
me._slotmap = dict([(s.name, s) for s in me.slots])
me._headslots = [s for s in me.slots if s.headline]
+
def _flush(me):
+ """
+ Write out the data for a group once we've detected the end of its stanza.
+ """
+
+ ## If there's no current stanza, then do nothing.
if me.st.name is None: return
+
+ ## Start emitting the object.
stdout.write("/* --- %s --- */\n" % me.st.name)
+
+ ## Get the various slots to compute themselves.
for s in me.slots: s.setup(me.st)
+
+ ## Write the initializer.
stdout.write("\nstatic %s c_%s = {" % (me.data_t, fix_name(me.st.name)))
sep = "\n "
for s in me.slots:
s.write(me.st)
sep = ",\n "
stdout.write("\n};\n\n")
+
+ ## Clear the state for the next stanza.
me.st.d = {}
me.st.name = None
+
@classmethod
def run(cls, input):
+ """
+ Main output for a group table. Reads the file INPUT.
+ """
+
+ ## Make an object for us to work on.
me = cls()
+
+ ## Write the output preamble.
write_header(me.mode, me.filename)
stdout.write('#include "%s"\n' % me.header)
write_preamble()
stdout.write("#define NO_MP { 0, 0, 0, 0, 0, 0 }\n\n")
+
+ ## The main group data. This will contain a `data_t' object for each
+ ## group we read. We'll also build the name to data map as we go.
write_banner("Group data")
stdout.write('\n')
with open(input) as file:
for line in file:
+
+ ## Parse the line into fields.
ff = line.split()
if not ff or ff[0].startswith('#'): continue
+
if ff[0] == 'alias':
+ ## An alias. Just remember this.
if len(ff) != 3: raise Exception, "wrong number of alias arguments"
me._flush()
me._names.append((ff[1], ff[2]))
+
elif ff[0] == me.keyword:
+ ## A headline for a new group.
+
+ ## Check the headline syntax. Headline slots may be set here, or
+ ## later by name.
if len(ff) < 2 or len(ff) > 2 + len(me._headslots):
raise Exception, "bad number of headline arguments"
+
+ ## Flush out the previous stanza.
me._flush()
+
+ ## Remember the new stanza's name, and add it to the list.
me.st.name = name = ff[1]
me._defs.add(name)
me._names.append((name, name))
+
+ ## Set headline slots from the remaining headline words.
for f, s in zip(ff[2:], me._headslots): s.set(me.st, f)
+
elif ff[0] in me._slotmap:
+ ## A slot assignment. Get the slot to store a value.
+ if me.st.name is None:
+ raise Exception, "no group currently being defined"
if len(ff) != 2:
raise Exception, "bad number of values for slot `%s'" % ff[0]
me._slotmap[ff[0]].set(me.st, ff[1])
+
else:
+ ## Something incomprehensible.
raise Exception, "unknown keyword `%s'" % ff[0]
+
+ ## End of the input. Write out the final stanza.
me._flush()
+
+ ## Now for the name-to-data mapping.
write_banner("Main table")
stdout.write("\nconst %s %s[] = {\n" % (me.entry_t, me.tabname))
for a, n in me._names:
raise Exception, "alias `%s' refers to unknown group `%s'" % (a, n)
stdout.write(' { "%s", &c_%s },\n' % (a, fix_name(n)))
stdout.write(" { 0, 0 }\n};\n\n")
+
+ ## We're done.
write_banner("That's all, folks")
class BaseSlot (object):
+ """
+ Base class for slot types.
+
+ The slot protocol works as follows. Throughout, ST is a state object as
+ maintained by a GroupTable.
+
+ __init__(NAME, [HEADLINE], [OMITP], [ALLOWP], ...)
+ Initialize the slot. The NAME identifies the slot,
+ and the keyword used to set it in input files. If
+ HEADLINE is true then the slot can be set from the
+ stanza headline. OMITP and ALLOWP are optional
+ functions: if OMITP(ST) returns true then the slot
+ may be omitted; conversely, if ALLOWP(ST, VALUE)
+ returns false then the slot cannot be assigned the
+ given VALUE. Other arguments may be allowed by
+ specific slot types.
+
+ set(ST, VALUE) Set the slot to the given VALUE, typically by setting
+ ST.d[me]. The default just stores the VALUE without
+ interpreting it.
+
+ setup(ST) Prepare the slot for output. The default method just
+ checks that ST.d contains a mapping for the slot.
+ All of the stanza's slots are set up before starting
+ on the initializer for the group data, so slots can
+ use this opportunity to emit preparatory definitions.
+
+ write(ST) Write an initializer for the slot to standard
+ output. There is no default.
+
+ The following attributes are exported.
+
+ headline A flag: can the slot be initialized from the stanza
+ headline?
+
+ name The slot's name.
+ """
+
def __init__(me, name, headline = False, omitp = None, allowp = None):
+ """
+ Initialize a new slot object, setting the necessary attributes.
+ """
me.name = name
me.headline = headline
me._omitp = omitp
me._allowp = allowp
+
def set(me, st, value):
+ """
+ Store a VALUE for the slot.
+ """
if me._allowp and not me._allowp(st, value):
raise Exception, "slot `%s' not allowed here" % me.name
st.d[me] = value
+
def setup(me, st):
+ """
+ Prepare the slot for output, checking its value and so on.
+ """
if me not in st.d and (not me._omitp or not me._omitp(st)):
raise Exception, "missing slot `%s'" % me.name
class EnumSlot (BaseSlot):
+ """
+ An EnumSlot object represents a slot which can contain one of a number of
+ named values.
+
+ An omitted value is written as a literal `0'.
+ """
+
def __init__(me, name, prefix, values, **kw):
+ """
+ Initialize an EnumSlot object.
+
+ The VALUES are a set of value names. On output, a value is converted to
+ uppercase, and prefixed by the PREFIX and an underscore.
+ """
super(EnumSlot, me).__init__(name, **kw)
me._values = set(values)
me._prefix = prefix
+
def set(me, st, value):
+ """
+ Check that the VALUE is one of the ones we know.
+ """
if value not in me._values:
raise Exception, "invalid %s value `%s'" % (me.name, value)
super(EnumSlot, me).set(st, value)
+
def write(me, st):
+ """
+ Convert the slot value to the C constant name.
+ """
try: stdout.write('%s_%s' % (me._prefix, st.d[me].upper()))
except KeyError: stdout.write('0')
class MPSlot (BaseSlot):
+ """
+ An MPSlot object represents a slot which can contain a multiprecision
+ integer.
+
+ An omitted value is written as a invalid `mp' object.
+ """
+
def set(me, st, value):
+ """
+ Set a value; convert it to a Python integer.
+ """
super(MPSlot, me).set(st, long(value, 0))
+
def setup(me, st):
+ """
+ Prepare to write the slot.
+
+ If this is a new integer, then write out a limb vector. Names for the
+ limbs are generated unimaginitively, using a counter.
+ """
super(MPSlot, me).setup(st)
v = st.d.get(me)
if v not in st.mpmap:
write_limbs('v%d' % st.nextmp, v)
st.mpmap[v] = mp_body('v%d' % st.nextmp, v)
st.nextmp += 1
+
def write(me, st):
+ """
+ Write out an `mp' initializer for the slot.
+ """
stdout.write(st.mpmap[st.d.get(me)])
class BinaryGroupTable (GroupTable):
op.set_defaults(typeinfo = './typeinfo.py')
opts, args = op.parse_args()
+## Parse the positional arguments.
if len(args) < 1: op.error('missing MODE')
mode = args[0]
+## Establish the choice of low-level C types.
TC = TypeChoice(opts.typeinfo)
+## Find the selected mode, and invoke the appropriate handler.
try: modefunc = MODEMAP[mode]
except KeyError: op.error("unknown mode `%s'" % mode)
modefunc(*args[1:])
###--------------------------------------------------------------------------
### Utilities.
-QUIS = OS.path.basename(argv[0])
+QUIS = OS.path.basename(argv[0]) # Program name, for use in errors.
def die(msg):
+ """Report MSG as a fatal error, and exit."""
stderr.write('%s: %s\n' % (QUIS, msg))
exit(1)
def indexed(seq):
+ """
+ Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
+ """
return IT.izip(IT.count(), seq)
###--------------------------------------------------------------------------
### Reading the input values.
+## Map column names to (Relation, # index) pairs.
COLMAP = {}
class Cursor (object):
+ """
+ A Cursor object keeps track of an iteration through a Relation.
+
+ At any time, the Cursor has a `current' row; the individual cells of this
+ row may be retrieved using Python's standard indexing operator. The `step'
+ method advances to the next row (if there is one). The `reset' method
+ returns to row zero.
+ """
+
def __init__(me, rel):
+ """
+ Initialize a new Cursor object, tracking its way through a Relation REL.
+
+ The new Cursor has row zero as its current row. The REL must not be
+ empty.
+ """
me._rel = rel
me.reset()
+
def step(me):
+ """
+ Advance the Cursor to the next row.
+
+ Returns False if there is no next row; otherwise True.
+ """
me._i += 1
if me._i >= len(me._rel):
me._i = me._row = None
return False
me._row = me._rel[me._i]
return True
+
def reset(me):
+ """
+ Reset the Cursor, so that row zero is current again.
+ """
me._i = 0
me._row = me._rel[0]
+
def __getitem__(me, i):
+ """
+ Return the item in column I of the Cursor's current row.
+
+ The index must be acceptable to the underlying row object, but otherwise
+ the Cursor imposes no restrictions. Indices need not be numeric, for
+ example.
+ """
return me._row[i]
+
def __repr__(me):
+ """
+ Return a text description of the Cursor, for diagnostic use.
+ """
return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
class CursorSet (object):
+ """
+ A CursorSet iterates over the cartiesian product of a number of Relations.
+
+ More precisely: it maintains a stack, each level of which tracks a number
+ of Relations. More Relations can be pushed onto this stack with the `push'
+ method, and removed with `pop'. The `step' method advances through the
+ cartesian product of the Relations in the top level of the stack -- the
+ `active' Relations. Columns from the current rows of all of the currently
+ known Relations -- whether active or not -- can be extracted using `get'.
+ """
+
def __init__(me):
+ """
+ Initialize a new CursorSet object.
+
+ A new CursorSet has an empty stack.
+ """
me._map = {}
me._stack = []
me._act = None
+
def push(me, rels):
+ """
+ Push the new Relations RELS onto the stack and start iterating.
+
+ The currently active Relations are pushed down. Those Relations which are
+ not already known to the CursorSet become the newly active collection.
+ (Relations which are already known are simply ignored.)
+
+ Iteration traverses Relations on the right more rapidly.
+ """
cc = []
rr = []
for r in rels:
cc.append(c)
me._stack.append((me._act, rr))
me._act = cc
+
def step(me):
+ """
+ Advance the CursorSet through the currently active Relations.
+
+ Return False if the active Relations have now been exhausted; otherwise
+ return True.
+ """
i = 0
while i < len(me._act):
if me._act[i].step(): return True
me._act[i].reset()
i += 1
return False
+
def pop(me):
+ """
+ Pop the active Relations.
+
+ Return to iterating over the previously active collection.
+ """
me._act, rels = me._stack.pop()
for r in rels: del me._map[r]
+
def get(me, rel, i):
+ """
+ Return the item with index I in the current row of Relation REL.
+ """
return me._map[rel][i]
class Relation (object):
+ """
+ A Relation keeps track of a table of data.
+
+ A Relation consists of a `header', which is a sequence of string names,
+ and a rectangular array of data, each row of which has the same number of
+ items as the header.
+
+ Relations can be iterated over using Cursors and CursorSets.
+ """
+
def __init__(me, head):
+ """
+ Initialize a new, empty Relation with header HEAD.
+
+ The `COLMAP' dictionary is updated to map the names in the header to this
+ Relation and its column indices.
+ """
me._head = head
me._rows = []
for i, c in indexed(head): COLMAP[c] = me, i
+
def addrow(me, row):
+ """
+ Add a ROW to the Relation.
+
+ The new row must have the correct number of entries.
+ """
if len(row) != len(me._head):
die("mismatch: row `%s' doesn't match heading `%s'" %
(', '.join(row), ', '.join(me._head)))
me._rows.append(row)
+
def __len__(me):
+ """Return the number of rows in the Relation."""
return len(me._rows)
+
def __getitem__(me, i):
+ """Return the Ith row of the Relation."""
return me._rows[i]
+
def __repr__(me):
+ """Return a textual description of the Relation, for diagnostic use."""
return '#<Relation %r>' % me._head
def read_immediate(word):
+ """
+ Return a Relation constructed by parsing WORD.
+
+ The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
+ comma-separated lists of strings which will form the relation's header and
+ rows respectively. There is no way to include an item which contains a
+ comma or whitespace.
+ """
head, rels = word.split('=', 1)
rel = Relation([c.strip() for c in head.split(',')])
for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
def read_file(spec):
+ """
+ Return a Relation constructed from a file, according to SPEC.
+
+ The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
+ comma-separated list of strings to form the relation's header. Each line
+ from the file which is neither empty nor begins with `#' is split into
+ whitespace-separated words to form a row in the relation. There is no way
+ to include an item which contains whitespace.
+ """
file, head = spec.split(':', 1)
rel = Relation([c.strip() for c in head.split(',')])
with open(file) as f:
rel.addrow(line.split())
def read_thing(spec):
+ """
+ Return a relation constructed from SPEC.
+
+ If SPEC begins with `@' then read the relation from a file (see
+ `read_file'); otherwise interpret it as immediate data (see
+ `read_immediate').
+ """
if spec.startswith('@'): read_file(spec[1:])
else: read_immediate(spec)
### Template structure.
class BasicTemplate (object):
+ """
+ Base class for template objects.
+
+ The protocol for templates consists of two methods:
+
+ relations() Return a set of Relations mentioned at top-level in
+ substitutions in the template.
+
+ subst(OUT, CS) Fill in the template, writing the output to the
+ stream OUT. The CS is a CursorSet object tracking
+ the current iteration state.
+ """
pass
class LiteralTemplate (BasicTemplate):
+ """
+ A LiteralTemplate outputs a fixed string.
+ """
+
def __init__(me, text, **kw):
+ """
+ Initialize a new LiteralTemplate object. TEXT is the text to be written.
+ """
super(LiteralTemplate, me).__init__(**kw)
me._text = text
+
def relations(me):
+ """A LiteralTemplate contains no substitutions."""
return set()
+
def subst(me, out, cs):
+ """A LiteralTemplate just emits its text."""
out.write(me._text)
+
def __repr__(me):
return '#<LiteralTemplate %r>' % me._text
class TagTemplate (BasicTemplate):
+ """
+ A TagTemplate object expands a substitution tag.
+
+ It extracts an item from the current row of a relation, processes it
+ according to an operation, and outputs the result.
+ """
+
def __init__(me, rel, i, op, **kw):
+ """
+ Initialize a new TagTemplate object.
+
+ REL is the relation from which to pick the output; I is the column index;
+ OP is a transformation to apply to the data, and may be None to indicate
+ that the data should not be transformed.
+ """
super(TagTemplate, me).__init__(**kw)
me._rel = rel
me._i = i
me._op = op
+
def relations(me):
+ """The TagTemplate knows which relation it uses."""
return set([me._rel])
+
def subst(me, out, cs):
+ """
+ A TagTemplate extracts and transforms an item from the current row of
+ a relation.
+ """
val = cs.get(me._rel, me._i)
if me._op is not None: val = me._op(val)
out.write(val)
+
def __repr__(me):
return '#<TagTemplate %s>' % me._rel._head[me._i]
class SequenceTemplate (BasicTemplate):
+ """
+ A SequenceTemplate concatenates a number of other templates.
+ """
+
def __new__(cls, seq, **kw):
+ """
+ Construct a template from a sequence SEQ of other templates.
+
+ If SEQ is a singleton (which it often is) then return it directly;
+ otherwise construct a SequenceTemplate.
+ """
if len(seq) == 1:
return seq[0]
else:
return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
def __init__(me, seq, **kw):
+ """
+ Initialize a new SequenceTemplate object from SEQ.
+
+ The sequence is flattened out: if SEQ contains SequenceTemplates then we
+ use their children directly, so that we don't have a useless tree.
+ """
super(SequenceTemplate, me).__init__(**kw)
tt = []
cls = type(me)
me._seq = tt
def relations(me):
+ """
+ The relations of a SequenceTemplate are the union of the relations of its
+ children.
+ """
rr = set()
for t in me._seq: rr.update(t.relations())
return rr
+
def subst(me, out, cs):
+ """
+ The output of a SequenceTemplate is the concatenation of the expansions
+ of its children.
+ """
for t in me._seq: t.subst(out, cs)
+
def __repr__(me):
return '#<SequenceTemplate %r>' % me._seq
class RepeatTemplate (BasicTemplate):
+ """
+ A RepeatTemplate iterates its body over a number of relations.
+ """
+
def __init__(me, sub):
+ """
+ Initialize a new RepeatTemplate, given a template to act as its body.
+ """
me._sub = sub
+
def relations(me):
+ """
+ A RepeatTemplate hides the relations of its body.
+ """
return set()
+
def subst(me, out, cs):
+ """
+ Substitute a RepeatTemplate, by iterating over the relations mentioned in
+ its body template.
+ """
rr = me._sub.relations()
for r in rr:
if len(r) == 0: return
me._sub.subst(out, cs)
if not cs.step(): break
cs.pop()
+
def __repr__(me):
return '#<RepeatTemplate %r>' % me._sub
### Some slightly cheesy parsing machinery.
class ParseState (object):
+ """
+ A ParseState object keeps track of a parser's position in a file.
+
+ The `curr' slot contains the current line under consideration.
+ """
+
def __init__(me, file, text):
+ """
+ Initialize a ParseState object.
+
+ The FILE is a string naming the source file, and the TEXT is an iterator
+ over the file's lines.
+ """
me._file = file
me._i = 0
me._it = iter(text.splitlines(True))
me.step()
+
def step(me):
+ """
+ Advance the ParseState to the next line.
+
+ Sets `curr' to the next line, or to None if the input is exhausted.
+ """
try: me.curr = me._it.next()
except StopIteration: me.curr = None
else: me._i += 1
+
def error(me, msg):
+ """
+ Report a fatal error during parsing, attributing it to the current line.
+ """
die('%s:%d: %s' % (me._file, me._i, msg))
class token (object):
+ """
+ A token object has no interesting properties other than its identity.
+ """
+
def __init__(me, name):
+ """Initialize a new token, with the given NAME."""
me._name = name
def __repr__(me):
+ """Return a description of the token, for diagnostic purposes."""
return '#<%s>' % me._name
+## Some magical tokens useful during parsing.
EOF = token('eof')
END = token('end')
+## Regular expressions matching substitution tags.
R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
+## A dictionary mapping operation names to functions which implement them.
OPMAP = {}
def defop(func):
+ """
+ Decorator for substitution operator functions.
+
+ Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
+ name, removing a prefix `op_' if there is one.
+
+ An operator function is given the raw value as an argument and should
+ return the transformed value.
+ """
name = func.func_name
if name.startswith('op_'): name = name[3:]
OPMAP[name] = func
return func
@defop
-def op_u(val): return val.upper()
+def op_u(val):
+ """@{COLUMN:u} -- the item in upper case."""
+ return val.upper()
@defop
-def op_l(val): return val.lower()
+def op_l(val):
+ """@{COLUMN:l} -- the item in upper case."""
+ return val.lower()
R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
@defop
-def op_c(val): return R_NOTIDENT.sub('_', val)
+def op_c(val):
+ """
+ @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
+ """
+ return R_NOTIDENT.sub('_', val)
def _pairify(val):
+ """
+ Split VAL into two, at an `=' sign.
+
+ If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
+ otherwise return (VAL, VAL).
+ """
c = val.find('=')
if c >= 0: return val[:c], val[c + 1:]
else: return val, val
@defop
-def op_left(val): return _pairify(val)[0]
+def op_left(val):
+ """@{COLUMN:left} -- the left-hand side of the item."""
+ return _pairify(val)[0]
@defop
-def op_right(val): return _pairify(val)[1]
+def op_right(val):
+ """@{COLUMN:right} -- the left-hand side of the item."""
+ return _pairify(val)[1]
def parse_text(ps):
+ """
+ Parse a chunk of text from a ParseState.
+
+ Stop when we get to something which looks like a template keyword, but
+ extract tags. Return the resulting template.
+
+ Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may
+ contain comments beginning `%#', which are ignored, and lines beginning
+ `%%' which have the initial `%' removed and are otherwise treated as normal
+ text (and, in particular, may contain tags). Other lines beginning with
+ `%' are directives and must be processed by our caller.
+ """
+
+ ## Starting out: no templates collected, and an empty buffer of literal
+ ## text.
tt = []
lit = StringIO()
+
def spill():
+ ## Spill accumulated literal text from `lit' into a LiteralTemplate
+ ## object.
l = lit.getvalue()
if l: tt.append(LiteralTemplate(l))
lit.reset()
lit.truncate()
+
+ ## Iterate over the lines of input.
while True:
line = ps.curr
+
+ ## Stop if there's no more text; handle lines beginning with `%'.
if line is None: break
elif line.startswith('%'):
if line.startswith('%#'): ps.step(); continue
elif line.startswith('%%'): line = line[1:]
else: break
+
+ ## Work through the line, finding tags.
i = 0
while True:
+
+ ## If there are no more `@' signs, there can be no more tags, and we're
+ ## done.
j = line.find('@', i)
if j < 0: break
+
+ ## Write the chunk we've found.
lit.write(line[i:j])
+
+ ## If the next character is also `@' then this is an escape and we
+ ## should carry on.
+ if line[j:].startswith('@@'):
+ lit.write('@')
+ i = j + 2
+ continue
+
+ ## Parse the tag into a column name, and maybe some operators.
m = R_SIMPLETAG.match(line, j)
if not m: m = R_COMPLEXTAG.match(line, j)
if not m: ps.error('invalid tag')
try: rel, i = COLMAP[col]
except KeyError: ps.error("unknown column `%s'" % col)
ops = m.lastindex >= 2 and m.group(2)
+
+ ## If we have operators then look them up and compose them.
wholeop = None
if ops:
for opname in ops[1:].split(':'):
except KeyError: ps.error("unknown operation `%s'" % opname)
if wholeop is None: wholeop = op
else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
+
+ ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
+ ## for the tag.
spill()
tt.append(TagTemplate(rel, i, wholeop))
+
+ ## Continue from after the tag.
i = m.end()
+
+ ## Finished a line. Write out the remainder of the line and move onto
+ ## the next.
lit.write(line[i:])
ps.step()
+
+ ## Run out of things to do. Flush out the rest of the literal text and
+ ## combine the templates.
spill()
return SequenceTemplate(tt)
+## A dictionary mapping regular expressions to directive-processing functions.
DIRECT = []
def direct(rx):
+ """
+ Function decorator for template file directives.
+
+ Associate the regular expression RX with the function in `DIRECT'.
+ Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
+ and M is the match object resulting from matching RX against the directive
+ text.
+ """
def _(func):
DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
return func
return _
def parse_template(ps):
+ """
+ Parse a single template from the ParseState PS.
+
+ A single template is either a chunk of text (parsed by `parse_text') or a
+ directive (handled by the appropriate function in `DIRECT').
+
+ Returns either a template object, or a special token. In particular, `EOF'
+ is returned if we run out of text; directives may return other tokens.
+ """
+
+ ## Skip initial comments. Otherwise we might end up with an empty
+ ## SequenceTemplate here.
while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
+
+ ## If we've run out of input, return `EOF' here. A line beginning `%%', or
+ ## not beginning `%', means we've found a chunk of text. Otherwise find
+ ## the right directive handler.
if ps.curr is None: return EOF
elif ps.curr.startswith('%'):
if ps.curr.startswith('%%'): return parse_text(ps)
return parse_text(ps)
def parse_templseq(ps, nestp):
+ """
+ Parse a sequence of templates from the ParseState PS.
+
+ Calls `parse_template' repeatedly If NESTP is true, then an `END' token
+ (presumably from a directive handler) is permitted and halts parsing;
+ otherwise `END' signifies an error.
+
+ Returns a template object.
+ """
+
tt = []
while True:
t = parse_template(ps)
@direct(r'repeat')
def dir_repeat(ps, m):
+ """
+ %repeat
+ BODY
+ %end
+
+ Iterate the body over the cartesian product of the relations mentioned
+ within.
+ """
return RepeatTemplate(parse_templseq(ps, True))
@direct(r'end')
def dir_end(ps, m):
+ """%end -- an end marker used to delimet chunks of template."""
return END
def compile_template(file, text):
+ """
+ Compile TEXT into a template, attributing errors to FILE.
+ """
ps = ParseState(file, text)
t = parse_templseq(ps, False)
return t
filetempl = compile_template('<output>', filepat)
def filenames(filetempl):
+ """
+ Generate the filenames in the compiled filename template FILETEMPL.
+ """
cs = CursorSet()
rr = filetempl.relations()
for r in rr:
if not cs.step(): break
cs.pop()
+## Main dispatch.
if opts.mode == 'list':
for file, cs in filenames(filetempl): print file
elif opts.mode == 'gen':