###--------------------------------------------------------------------------
### Utilities.
-QUIS = OS.path.basename(argv[0])
+QUIS = OS.path.basename(argv[0]) # Program name, for use in errors.
def die(msg):
+ """Report MSG as a fatal error, and exit."""
stderr.write('%s: %s\n' % (QUIS, msg))
exit(1)
def indexed(seq):
+ """
+ Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
+ """
return IT.izip(IT.count(), seq)
###--------------------------------------------------------------------------
### Reading the input values.
+## Map column names to (Relation, # index) pairs.
COLMAP = {}
class Cursor (object):
+ """
+ A Cursor object keeps track of an iteration through a Relation.
+
+ At any time, the Cursor has a `current' row; the individual cells of this
+ row may be retrieved using Python's standard indexing operator. The `step'
+ method advances to the next row (if there is one). The `reset' method
+ returns to row zero.
+ """
+
def __init__(me, rel):
+ """
+ Initialize a new Cursor object, tracking its way through a Relation REL.
+
+ The new Cursor has row zero as its current row. The REL must not be
+ empty.
+ """
me._rel = rel
me.reset()
+
def step(me):
+ """
+ Advance the Cursor to the next row.
+
+ Returns False if there is no next row; otherwise True.
+ """
me._i += 1
if me._i >= len(me._rel):
me._i = me._row = None
return False
me._row = me._rel[me._i]
return True
+
def reset(me):
+ """
+ Reset the Cursor, so that row zero is current again.
+ """
me._i = 0
me._row = me._rel[0]
+
def __getitem__(me, i):
+ """
+ Return the item in column I of the Cursor's current row.
+
+ The index must be acceptable to the underlying row object, but otherwise
+ the Cursor imposes no restrictions. Indices need not be numeric, for
+ example.
+ """
return me._row[i]
+
def __repr__(me):
+ """
+ Return a text description of the Cursor, for diagnostic use.
+ """
return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
class CursorSet (object):
+ """
+ A CursorSet iterates over the cartiesian product of a number of Relations.
+
+ More precisely: it maintains a stack, each level of which tracks a number
+ of Relations. More Relations can be pushed onto this stack with the `push'
+ method, and removed with `pop'. The `step' method advances through the
+ cartesian product of the Relations in the top level of the stack -- the
+ `active' Relations. Columns from the current rows of all of the currently
+ known Relations -- whether active or not -- can be extracted using `get'.
+ """
+
def __init__(me):
+ """
+ Initialize a new CursorSet object.
+
+ A new CursorSet has an empty stack.
+ """
me._map = {}
me._stack = []
me._act = None
+
def push(me, rels):
+ """
+ Push the new Relations RELS onto the stack and start iterating.
+
+ The currently active Relations are pushed down. Those Relations which are
+ not already known to the CursorSet become the newly active collection.
+ (Relations which are already known are simply ignored.)
+
+ Iteration traverses Relations on the right more rapidly.
+ """
cc = []
rr = []
for r in rels:
cc.append(c)
me._stack.append((me._act, rr))
me._act = cc
+
def step(me):
+ """
+ Advance the CursorSet through the currently active Relations.
+
+ Return False if the active Relations have now been exhausted; otherwise
+ return True.
+ """
i = 0
while i < len(me._act):
if me._act[i].step(): return True
me._act[i].reset()
i += 1
return False
+
def pop(me):
+ """
+ Pop the active Relations.
+
+ Return to iterating over the previously active collection.
+ """
me._act, rels = me._stack.pop()
for r in rels: del me._map[r]
+
def get(me, rel, i):
+ """
+ Return the item with index I in the current row of Relation REL.
+ """
return me._map[rel][i]
class Relation (object):
+ """
+ A Relation keeps track of a table of data.
+
+ A Relation consists of a `header', which is a sequence of string names,
+ and a rectangular array of data, each row of which has the same number of
+ items as the header.
+
+ Relations can be iterated over using Cursors and CursorSets.
+ """
+
def __init__(me, head):
+ """
+ Initialize a new, empty Relation with header HEAD.
+
+ The `COLMAP' dictionary is updated to map the names in the header to this
+ Relation and its column indices.
+ """
me._head = head
me._rows = []
for i, c in indexed(head): COLMAP[c] = me, i
+
def addrow(me, row):
+ """
+ Add a ROW to the Relation.
+
+ The new row must have the correct number of entries.
+ """
if len(row) != len(me._head):
die("mismatch: row `%s' doesn't match heading `%s'" %
(', '.join(row), ', '.join(me._head)))
me._rows.append(row)
+
def __len__(me):
+ """Return the number of rows in the Relation."""
return len(me._rows)
+
def __getitem__(me, i):
+ """Return the Ith row of the Relation."""
return me._rows[i]
+
def __repr__(me):
+ """Return a textual description of the Relation, for diagnostic use."""
return '#<Relation %r>' % me._head
def read_immediate(word):
+ """
+ Return a Relation constructed by parsing WORD.
+
+ The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
+ comma-separated lists of strings which will form the relation's header and
+ rows respectively. There is no way to include an item which contains a
+ comma or whitespace.
+ """
head, rels = word.split('=', 1)
rel = Relation([c.strip() for c in head.split(',')])
for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
def read_file(spec):
+ """
+ Return a Relation constructed from a file, according to SPEC.
+
+ The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
+ comma-separated list of strings to form the relation's header. Each line
+ from the file which is neither empty nor begins with `#' is split into
+ whitespace-separated words to form a row in the relation. There is no way
+ to include an item which contains whitespace.
+ """
file, head = spec.split(':', 1)
rel = Relation([c.strip() for c in head.split(',')])
with open(file) as f:
rel.addrow(line.split())
def read_thing(spec):
+ """
+ Return a relation constructed from SPEC.
+
+ If SPEC begins with `@' then read the relation from a file (see
+ `read_file'); otherwise interpret it as immediate data (see
+ `read_immediate').
+ """
if spec.startswith('@'): read_file(spec[1:])
else: read_immediate(spec)
### Template structure.
class BasicTemplate (object):
+ """
+ Base class for template objects.
+
+ The protocol for templates consists of two methods:
+
+ relations() Return a set of Relations mentioned at top-level in
+ substitutions in the template.
+
+ subst(OUT, CS) Fill in the template, writing the output to the
+ stream OUT. The CS is a CursorSet object tracking
+ the current iteration state.
+ """
pass
class LiteralTemplate (BasicTemplate):
+ """
+ A LiteralTemplate outputs a fixed string.
+ """
+
def __init__(me, text, **kw):
+ """
+ Initialize a new LiteralTemplate object. TEXT is the text to be written.
+ """
super(LiteralTemplate, me).__init__(**kw)
me._text = text
+
def relations(me):
+ """A LiteralTemplate contains no substitutions."""
return set()
+
def subst(me, out, cs):
+ """A LiteralTemplate just emits its text."""
out.write(me._text)
+
def __repr__(me):
return '#<LiteralTemplate %r>' % me._text
class TagTemplate (BasicTemplate):
+ """
+ A TagTemplate object expands a substitution tag.
+
+ It extracts an item from the current row of a relation, processes it
+ according to an operation, and outputs the result.
+ """
+
def __init__(me, rel, i, op, **kw):
+ """
+ Initialize a new TagTemplate object.
+
+ REL is the relation from which to pick the output; I is the column index;
+ OP is a transformation to apply to the data, and may be None to indicate
+ that the data should not be transformed.
+ """
super(TagTemplate, me).__init__(**kw)
me._rel = rel
me._i = i
me._op = op
+
def relations(me):
+ """The TagTemplate knows which relation it uses."""
return set([me._rel])
+
def subst(me, out, cs):
+ """
+ A TagTemplate extracts and transforms an item from the current row of
+ a relation.
+ """
val = cs.get(me._rel, me._i)
if me._op is not None: val = me._op(val)
out.write(val)
+
def __repr__(me):
return '#<TagTemplate %s>' % me._rel._head[me._i]
class SequenceTemplate (BasicTemplate):
+ """
+ A SequenceTemplate concatenates a number of other templates.
+ """
+
def __new__(cls, seq, **kw):
+ """
+ Construct a template from a sequence SEQ of other templates.
+
+ If SEQ is a singleton (which it often is) then return it directly;
+ otherwise construct a SequenceTemplate.
+ """
if len(seq) == 1:
return seq[0]
else:
return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
def __init__(me, seq, **kw):
+ """
+ Initialize a new SequenceTemplate object from SEQ.
+
+ The sequence is flattened out: if SEQ contains SequenceTemplates then we
+ use their children directly, so that we don't have a useless tree.
+ """
super(SequenceTemplate, me).__init__(**kw)
tt = []
cls = type(me)
me._seq = tt
def relations(me):
+ """
+ The relations of a SequenceTemplate are the union of the relations of its
+ children.
+ """
rr = set()
for t in me._seq: rr.update(t.relations())
return rr
+
def subst(me, out, cs):
+ """
+ The output of a SequenceTemplate is the concatenation of the expansions
+ of its children.
+ """
for t in me._seq: t.subst(out, cs)
+
def __repr__(me):
return '#<SequenceTemplate %r>' % me._seq
class RepeatTemplate (BasicTemplate):
+ """
+ A RepeatTemplate iterates its body over a number of relations.
+ """
+
def __init__(me, sub):
+ """
+ Initialize a new RepeatTemplate, given a template to act as its body.
+ """
me._sub = sub
+
def relations(me):
+ """
+ A RepeatTemplate hides the relations of its body.
+ """
return set()
+
def subst(me, out, cs):
+ """
+ Substitute a RepeatTemplate, by iterating over the relations mentioned in
+ its body template.
+ """
rr = me._sub.relations()
for r in rr:
if len(r) == 0: return
me._sub.subst(out, cs)
if not cs.step(): break
cs.pop()
+
def __repr__(me):
return '#<RepeatTemplate %r>' % me._sub
### Some slightly cheesy parsing machinery.
class ParseState (object):
+ """
+ A ParseState object keeps track of a parser's position in a file.
+
+ The `curr' slot contains the current line under consideration.
+ """
+
def __init__(me, file, text):
+ """
+ Initialize a ParseState object.
+
+ The FILE is a string naming the source file, and the TEXT is an iterator
+ over the file's lines.
+ """
me._file = file
me._i = 0
me._it = iter(text.splitlines(True))
me.step()
+
def step(me):
+ """
+ Advance the ParseState to the next line.
+
+ Sets `curr' to the next line, or to None if the input is exhausted.
+ """
try: me.curr = me._it.next()
except StopIteration: me.curr = None
else: me._i += 1
+
def error(me, msg):
+ """
+ Report a fatal error during parsing, attributing it to the current line.
+ """
die('%s:%d: %s' % (me._file, me._i, msg))
class token (object):
+ """
+ A token object has no interesting properties other than its identity.
+ """
+
def __init__(me, name):
+ """Initialize a new token, with the given NAME."""
me._name = name
def __repr__(me):
+ """Return a description of the token, for diagnostic purposes."""
return '#<%s>' % me._name
+## Some magical tokens useful during parsing.
EOF = token('eof')
END = token('end')
+## Regular expressions matching substitution tags.
R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
+## A dictionary mapping operation names to functions which implement them.
OPMAP = {}
def defop(func):
+ """
+ Decorator for substitution operator functions.
+
+ Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
+ name, removing a prefix `op_' if there is one.
+
+ An operator function is given the raw value as an argument and should
+ return the transformed value.
+ """
name = func.func_name
if name.startswith('op_'): name = name[3:]
OPMAP[name] = func
return func
@defop
-def op_u(val): return val.upper()
+def op_u(val):
+ """@{COLUMN:u} -- the item in upper case."""
+ return val.upper()
@defop
-def op_l(val): return val.lower()
+def op_l(val):
+ """@{COLUMN:l} -- the item in upper case."""
+ return val.lower()
+
+@defop
+def op_f(val):
+ """@{COLUMN:f} -- the item, with `/' characters replaced by `-'."""
+ return val.replace('/', '-')
R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
@defop
-def op_c(val): return R_NOTIDENT.sub('_', val)
+def op_c(val):
+ """
+ @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
+ """
+ return R_NOTIDENT.sub('_', val)
def _pairify(val):
+ """
+ Split VAL into two, at an `=' sign.
+
+ If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
+ otherwise return (VAL, VAL).
+ """
c = val.find('=')
if c >= 0: return val[:c], val[c + 1:]
else: return val, val
@defop
-def op_left(val): return _pairify(val)[0]
+def op_left(val):
+ """@{COLUMN:left} -- the left-hand side of the item."""
+ return _pairify(val)[0]
@defop
-def op_right(val): return _pairify(val)[1]
+def op_right(val):
+ """@{COLUMN:right} -- the left-hand side of the item."""
+ return _pairify(val)[1]
def parse_text(ps):
+ """
+ Parse a chunk of text from a ParseState.
+
+ Stop when we get to something which looks like a template keyword, but
+ extract tags. Return the resulting template.
+
+ Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may
+ contain comments beginning `%#', which are ignored, and lines beginning
+ `%%' which have the initial `%' removed and are otherwise treated as normal
+ text (and, in particular, may contain tags). Other lines beginning with
+ `%' are directives and must be processed by our caller.
+ """
+
+ ## Starting out: no templates collected, and an empty buffer of literal
+ ## text.
tt = []
lit = StringIO()
+
def spill():
+ ## Spill accumulated literal text from `lit' into a LiteralTemplate
+ ## object.
l = lit.getvalue()
if l: tt.append(LiteralTemplate(l))
lit.reset()
lit.truncate()
+
+ ## Iterate over the lines of input.
while True:
line = ps.curr
+
+ ## Stop if there's no more text; handle lines beginning with `%'.
if line is None: break
elif line.startswith('%'):
if line.startswith('%#'): ps.step(); continue
elif line.startswith('%%'): line = line[1:]
else: break
+
+ ## Work through the line, finding tags.
i = 0
while True:
+
+ ## If there are no more `@' signs, there can be no more tags, and we're
+ ## done.
j = line.find('@', i)
if j < 0: break
+
+ ## Write the chunk we've found.
lit.write(line[i:j])
+
+ ## If the next character is also `@' then this is an escape and we
+ ## should carry on.
+ if line[j:].startswith('@@'):
+ lit.write('@')
+ i = j + 2
+ continue
+
+ ## Parse the tag into a column name, and maybe some operators.
m = R_SIMPLETAG.match(line, j)
if not m: m = R_COMPLEXTAG.match(line, j)
if not m: ps.error('invalid tag')
try: rel, i = COLMAP[col]
except KeyError: ps.error("unknown column `%s'" % col)
ops = m.lastindex >= 2 and m.group(2)
+
+ ## If we have operators then look them up and compose them.
wholeop = None
if ops:
for opname in ops[1:].split(':'):
except KeyError: ps.error("unknown operation `%s'" % opname)
if wholeop is None: wholeop = op
else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
+
+ ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
+ ## for the tag.
spill()
tt.append(TagTemplate(rel, i, wholeop))
+
+ ## Continue from after the tag.
i = m.end()
+
+ ## Finished a line. Write out the remainder of the line and move onto
+ ## the next.
lit.write(line[i:])
ps.step()
+
+ ## Run out of things to do. Flush out the rest of the literal text and
+ ## combine the templates.
spill()
return SequenceTemplate(tt)
+## A dictionary mapping regular expressions to directive-processing functions.
DIRECT = []
def direct(rx):
+ """
+ Function decorator for template file directives.
+
+ Associate the regular expression RX with the function in `DIRECT'.
+ Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
+ and M is the match object resulting from matching RX against the directive
+ text.
+ """
def _(func):
DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
return func
return _
def parse_template(ps):
+ """
+ Parse a single template from the ParseState PS.
+
+ A single template is either a chunk of text (parsed by `parse_text') or a
+ directive (handled by the appropriate function in `DIRECT').
+
+ Returns either a template object, or a special token. In particular, `EOF'
+ is returned if we run out of text; directives may return other tokens.
+ """
+
+ ## Skip initial comments. Otherwise we might end up with an empty
+ ## SequenceTemplate here.
while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
+
+ ## If we've run out of input, return `EOF' here. A line beginning `%%', or
+ ## not beginning `%', means we've found a chunk of text. Otherwise find
+ ## the right directive handler.
if ps.curr is None: return EOF
elif ps.curr.startswith('%'):
if ps.curr.startswith('%%'): return parse_text(ps)
return parse_text(ps)
def parse_templseq(ps, nestp):
+ """
+ Parse a sequence of templates from the ParseState PS.
+
+ Calls `parse_template' repeatedly If NESTP is true, then an `END' token
+ (presumably from a directive handler) is permitted and halts parsing;
+ otherwise `END' signifies an error.
+
+ Returns a template object.
+ """
+
tt = []
while True:
t = parse_template(ps)
@direct(r'repeat')
def dir_repeat(ps, m):
+ """
+ %repeat
+ BODY
+ %end
+
+ Iterate the body over the cartesian product of the relations mentioned
+ within.
+ """
return RepeatTemplate(parse_templseq(ps, True))
@direct(r'end')
def dir_end(ps, m):
+ """%end -- an end marker used to delimet chunks of template."""
return END
def compile_template(file, text):
+ """
+ Compile TEXT into a template, attributing errors to FILE.
+ """
ps = ParseState(file, text)
t = parse_templseq(ps, False)
return t
op = OP.OptionParser(
description = 'Generates files by filling in simple templates',
- usage = 'usage: %prog [-gl] FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
+ usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
version = 'Catacomb version @VERSION@')
+def cb_gen(opt, optstr, arg, op):
+ op.values.input = arg
+ op.values.mode = 'gen'
for short, long, kw in [
('-l', '--list', dict(
action = 'store_const', const = 'list', dest = 'mode',
help = 'list filenames generated')),
('-g', '--generate', dict(
- action = 'store', metavar = 'PATH', dest = 'input',
- help = 'generate output (default)'))]:
+ action = 'callback', metavar = 'TEMPLATE',
+ callback = cb_gen, type = 'string',
+ help = 'generate file(s) from TEMPLATE file'))]:
op.add_option(short, long, **kw)
-op.set_defaults(mode = 'gen')
+op.set_defaults(mode = 'what?')
opts, args = op.parse_args()
if len(args) < 1: op.error('missing FILE')
filetempl = compile_template('<output>', filepat)
def filenames(filetempl):
+ """
+ Generate the filenames in the compiled filename template FILETEMPL.
+ """
cs = CursorSet()
rr = filetempl.relations()
for r in rr:
if not cs.step(): break
cs.pop()
+## Main dispatch.
if opts.mode == 'list':
for file, cs in filenames(filetempl): print file
elif opts.mode == 'gen':
templ.subst(out, cs)
OS.rename(new, file)
else:
- raise Exception, 'What am I doing here?'
+ die('What am I doing here?')
###----- That's all, folks --------------------------------------------------