3 ### Generate files by filling in simple templates
5 ### (c) 2013 Straylight/Edgeware
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of Catacomb.
12 ### Catacomb is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU Library General Public License as
14 ### published by the Free Software Foundation; either version 2 of the
15 ### License, or (at your option) any later version.
17 ### Catacomb is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU Library General Public License for more details.
22 ### You should have received a copy of the GNU Library General Public
23 ### License along with Catacomb; if not, write to the Free
24 ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 ### MA 02111-1307, USA.
27 from __future__ import with_statement
29 import itertools as IT
34 if SYS.version_info >= (3,): from io import StringIO
35 else: from cStringIO import StringIO
36 from sys import argv, exit, stderr
38 ###--------------------------------------------------------------------------
41 QUIS = OS.path.basename(argv[0]) # Program name, for use in errors.
44 """Report MSG as a fatal error, and exit."""
45 stderr.write('%s: %s\n' % (QUIS, msg))
50 Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
52 return IT.izip(IT.count(), seq)
54 if SYS.version_info >= (3,):
55 def func_name(func): return func.__name__
58 def func_name(func): return func.func_name
62 def next(obj): return obj.next()
64 ###--------------------------------------------------------------------------
65 ### Reading the input values.
67 ## Map column names to (Relation, # index) pairs.
70 class Cursor (object):
72 A Cursor object keeps track of an iteration through a Relation.
74 At any time, the Cursor has a `current' row; the individual cells of this
75 row may be retrieved using Python's standard indexing operator. The `step'
76 method advances to the next row (if there is one). The `reset' method
80 def __init__(me, rel):
82 Initialize a new Cursor object, tracking its way through a Relation REL.
84 The new Cursor has row zero as its current row. The REL must not be
92 Advance the Cursor to the next row.
94 Returns False if there is no next row; otherwise True.
97 if me._i >= len(me._rel):
98 me._i = me._row = None
100 me._row = me._rel[me._i]
105 Reset the Cursor, so that row zero is current again.
110 def __getitem__(me, i):
112 Return the item in column I of the Cursor's current row.
114 The index must be acceptable to the underlying row object, but otherwise
115 the Cursor imposes no restrictions. Indices need not be numeric, for
122 Return a text description of the Cursor, for diagnostic use.
124 return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
126 class CursorSet (object):
128 A CursorSet iterates over the cartiesian product of a number of Relations.
130 More precisely: it maintains a stack, each level of which tracks a number
131 of Relations. More Relations can be pushed onto this stack with the `push'
132 method, and removed with `pop'. The `step' method advances through the
133 cartesian product of the Relations in the top level of the stack -- the
134 `active' Relations. Columns from the current rows of all of the currently
135 known Relations -- whether active or not -- can be extracted using `get'.
140 Initialize a new CursorSet object.
142 A new CursorSet has an empty stack.
150 Push the new Relations RELS onto the stack and start iterating.
152 The currently active Relations are pushed down. Those Relations which are
153 not already known to the CursorSet become the newly active collection.
154 (Relations which are already known are simply ignored.)
156 Iteration traverses Relations on the right more rapidly.
161 if r in me._map: continue
162 c = me._map[r] = Cursor(r)
165 me._stack.append((me._act, rr))
170 Advance the CursorSet through the currently active Relations.
172 Return False if the active Relations have now been exhausted; otherwise
176 while i < len(me._act):
177 if me._act[i].step(): return True
178 if i >= len(me._act): return False
185 Pop the active Relations.
187 Return to iterating over the previously active collection.
189 me._act, rels = me._stack.pop()
190 for r in rels: del me._map[r]
194 Return the item with index I in the current row of Relation REL.
196 return me._map[rel][i]
198 class Relation (object):
200 A Relation keeps track of a table of data.
202 A Relation consists of a `header', which is a sequence of string names,
203 and a rectangular array of data, each row of which has the same number of
206 Relations can be iterated over using Cursors and CursorSets.
209 def __init__(me, head):
211 Initialize a new, empty Relation with header HEAD.
213 The `COLMAP' dictionary is updated to map the names in the header to this
214 Relation and its column indices.
218 for i, c in indexed(head): COLMAP[c] = me, i
222 Add a ROW to the Relation.
224 The new row must have the correct number of entries.
226 if len(row) != len(me._head):
227 die("mismatch: row `%s' doesn't match heading `%s'" %
228 (', '.join(row), ', '.join(me._head)))
232 """Return the number of rows in the Relation."""
235 def __getitem__(me, i):
236 """Return the Ith row of the Relation."""
240 """Return a textual description of the Relation, for diagnostic use."""
241 return '#<Relation %r>' % me._head
243 def read_immediate(word):
245 Return a Relation constructed by parsing WORD.
247 The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
248 comma-separated lists of strings which will form the relation's header and
249 rows respectively. There is no way to include an item which contains a
252 head, rels = word.split('=', 1)
253 rel = Relation([c.strip() for c in head.split(',')])
254 for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
258 Return a Relation constructed from a file, according to SPEC.
260 The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
261 comma-separated list of strings to form the relation's header. Each line
262 from the file which is neither empty nor begins with `#' is split into
263 whitespace-separated words to form a row in the relation. There is no way
264 to include an item which contains whitespace.
266 file, head = spec.split(':', 1)
267 rel = Relation([c.strip() for c in head.split(',')])
268 with open(file) as f:
271 if line.startswith('#') or line == '': continue
272 rel.addrow(line.split())
274 def read_thing(spec):
276 Return a relation constructed from SPEC.
278 If SPEC begins with `@' then read the relation from a file (see
279 `read_file'); otherwise interpret it as immediate data (see
282 if spec.startswith('@'): read_file(spec[1:])
283 else: read_immediate(spec)
285 ###--------------------------------------------------------------------------
286 ### Template structure.
288 class BasicTemplate (object):
290 Base class for template objects.
292 The protocol for templates consists of two methods:
294 relations() Return a set of Relations mentioned at top-level in
295 substitutions in the template.
297 subst(OUT, CS) Fill in the template, writing the output to the
298 stream OUT. The CS is a CursorSet object tracking
299 the current iteration state.
303 class LiteralTemplate (BasicTemplate):
305 A LiteralTemplate outputs a fixed string.
308 def __init__(me, text, **kw):
310 Initialize a new LiteralTemplate object. TEXT is the text to be written.
312 super(LiteralTemplate, me).__init__(**kw)
316 """A LiteralTemplate contains no substitutions."""
319 def subst(me, out, cs):
320 """A LiteralTemplate just emits its text."""
324 return '#<LiteralTemplate %r>' % me._text
326 class TagTemplate (BasicTemplate):
328 A TagTemplate object expands a substitution tag.
330 It extracts an item from the current row of a relation, processes it
331 according to an operation, and outputs the result.
334 def __init__(me, rel, i, op, **kw):
336 Initialize a new TagTemplate object.
338 REL is the relation from which to pick the output; I is the column index;
339 OP is a transformation to apply to the data, and may be None to indicate
340 that the data should not be transformed.
342 super(TagTemplate, me).__init__(**kw)
348 """The TagTemplate knows which relation it uses."""
349 return set([me._rel])
351 def subst(me, out, cs):
353 A TagTemplate extracts and transforms an item from the current row of
356 val = cs.get(me._rel, me._i)
357 if me._op is not None: val = me._op(val)
361 return '#<TagTemplate %s>' % me._rel._head[me._i]
363 class SequenceTemplate (BasicTemplate):
365 A SequenceTemplate concatenates a number of other templates.
368 def __new__(cls, seq, **kw):
370 Construct a template from a sequence SEQ of other templates.
372 If SEQ is a singleton (which it often is) then return it directly;
373 otherwise construct a SequenceTemplate.
378 return super(SequenceTemplate, cls).__new__(cls, **kw)
380 def __init__(me, seq, **kw):
382 Initialize a new SequenceTemplate object from SEQ.
384 The sequence is flattened out: if SEQ contains SequenceTemplates then we
385 use their children directly, so that we don't have a useless tree.
387 super(SequenceTemplate, me).__init__(**kw)
391 if isinstance(t, cls): tt += t._seq
397 The relations of a SequenceTemplate are the union of the relations of its
401 for t in me._seq: rr.update(t.relations())
404 def subst(me, out, cs):
406 The output of a SequenceTemplate is the concatenation of the expansions
409 for t in me._seq: t.subst(out, cs)
412 return '#<SequenceTemplate %r>' % me._seq
414 class RepeatTemplate (BasicTemplate):
416 A RepeatTemplate iterates its body over a number of relations.
419 def __init__(me, sub):
421 Initialize a new RepeatTemplate, given a template to act as its body.
427 A RepeatTemplate hides the relations of its body.
431 def subst(me, out, cs):
433 Substitute a RepeatTemplate, by iterating over the relations mentioned in
436 rr = me._sub.relations()
438 if len(r) == 0: return
441 me._sub.subst(out, cs)
442 if not cs.step(): break
446 return '#<RepeatTemplate %r>' % me._sub
448 ###--------------------------------------------------------------------------
449 ### Some slightly cheesy parsing machinery.
451 class ParseState (object):
453 A ParseState object keeps track of a parser's position in a file.
455 The `curr' slot contains the current line under consideration.
458 def __init__(me, file, text):
460 Initialize a ParseState object.
462 The FILE is a string naming the source file, and the TEXT is an iterator
463 over the file's lines.
467 me._it = iter(text.splitlines(True))
472 Advance the ParseState to the next line.
474 Sets `curr' to the next line, or to None if the input is exhausted.
476 try: me.curr = next(me._it)
477 except StopIteration: me.curr = None
482 Report a fatal error during parsing, attributing it to the current line.
484 die('%s:%d: %s' % (me._file, me._i, msg))
486 class token (object):
488 A token object has no interesting properties other than its identity.
491 def __init__(me, name):
492 """Initialize a new token, with the given NAME."""
495 """Return a description of the token, for diagnostic purposes."""
496 return '#<%s>' % me._name
498 ## Some magical tokens useful during parsing.
502 ## Regular expressions matching substitution tags.
503 R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
504 R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
506 ## A dictionary mapping operation names to functions which implement them.
511 Decorator for substitution operator functions.
513 Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
514 name, removing a prefix `op_' if there is one.
516 An operator function is given the raw value as an argument and should
517 return the transformed value.
519 name = func_name(func)
520 if name.startswith('op_'): name = name[3:]
526 """@{COLUMN:u} -- the item in upper case."""
531 """@{COLUMN:l} -- the item in upper case."""
536 """@{COLUMN:f} -- the item, with `/' characters replaced by `-'."""
537 return val.replace('/', '-')
539 R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
543 @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
545 return R_NOTIDENT.sub('_', val)
549 Split VAL into two, at an `=' sign.
551 If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
552 otherwise return (VAL, VAL).
555 if c >= 0: return val[:c], val[c + 1:]
556 else: return val, val
560 """@{COLUMN:left} -- the left-hand side of the item."""
561 return _pairify(val)[0]
564 """@{COLUMN:right} -- the left-hand side of the item."""
565 return _pairify(val)[1]
569 Parse a chunk of text from a ParseState.
571 Stop when we get to something which looks like a template keyword, but
572 extract tags. Return the resulting template.
574 Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may
575 contain comments beginning `%#', which are ignored, and lines beginning
576 `%%' which have the initial `%' removed and are otherwise treated as normal
577 text (and, in particular, may contain tags). Other lines beginning with
578 `%' are directives and must be processed by our caller.
581 ## Starting out: no templates collected, and an empty buffer of literal
587 ## Spill accumulated literal text from `lit' into a LiteralTemplate
590 if l: tt.append(LiteralTemplate(l))
594 ## Iterate over the lines of input.
598 ## Stop if there's no more text; handle lines beginning with `%'.
599 if line is None: break
600 elif line.startswith('%'):
601 if line.startswith('%#'): ps.step(); continue
602 elif line.startswith('%%'): line = line[1:]
605 ## Work through the line, finding tags.
609 ## If there are no more `@' signs, there can be no more tags, and we're
611 j = line.find('@', i)
614 ## Write the chunk we've found.
617 ## If the next character is also `@' then this is an escape and we
619 if line[j:].startswith('@@'):
624 ## Parse the tag into a column name, and maybe some operators.
625 m = R_SIMPLETAG.match(line, j)
626 if not m: m = R_COMPLEXTAG.match(line, j)
627 if not m: ps.error('invalid tag')
629 try: rel, i = COLMAP[col]
630 except KeyError: ps.error("unknown column `%s'" % col)
631 ops = m.lastindex >= 2 and m.group(2)
633 ## If we have operators then look them up and compose them.
636 for opname in ops[1:].split(':'):
637 try: op = OPMAP[opname]
638 except KeyError: ps.error("unknown operation `%s'" % opname)
639 if wholeop is None: wholeop = op
640 else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
642 ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
645 tt.append(TagTemplate(rel, i, wholeop))
647 ## Continue from after the tag.
650 ## Finished a line. Write out the remainder of the line and move onto
655 ## Run out of things to do. Flush out the rest of the literal text and
656 ## combine the templates.
658 return SequenceTemplate(tt)
660 ## A dictionary mapping regular expressions to directive-processing functions.
665 Function decorator for template file directives.
667 Associate the regular expression RX with the function in `DIRECT'.
668 Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
669 and M is the match object resulting from matching RX against the directive
673 DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
677 def parse_template(ps):
679 Parse a single template from the ParseState PS.
681 A single template is either a chunk of text (parsed by `parse_text') or a
682 directive (handled by the appropriate function in `DIRECT').
684 Returns either a template object, or a special token. In particular, `EOF'
685 is returned if we run out of text; directives may return other tokens.
688 ## Skip initial comments. Otherwise we might end up with an empty
689 ## SequenceTemplate here.
690 while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
692 ## If we've run out of input, return `EOF' here. A line beginning `%%', or
693 ## not beginning `%', means we've found a chunk of text. Otherwise find
694 ## the right directive handler.
695 if ps.curr is None: return EOF
696 elif ps.curr.startswith('%'):
697 if ps.curr.startswith('%%'): return parse_text(ps)
698 for rx, func in DIRECT:
699 line = ps.curr[1:].strip()
704 ps.error("unrecognized directive")
706 return parse_text(ps)
708 def parse_templseq(ps, nestp):
710 Parse a sequence of templates from the ParseState PS.
712 Calls `parse_template' repeatedly If NESTP is true, then an `END' token
713 (presumably from a directive handler) is permitted and halts parsing;
714 otherwise `END' signifies an error.
716 Returns a template object.
721 t = parse_template(ps)
724 else: ps.error("unexpected `end' directive")
726 if nestp: ps.error("unexpected end of file")
729 return SequenceTemplate(tt)
732 def dir_repeat(ps, m):
738 Iterate the body over the cartesian product of the relations mentioned
741 return RepeatTemplate(parse_templseq(ps, True))
745 """%end -- an end marker used to delimet chunks of template."""
748 def compile_template(file, text):
750 Compile TEXT into a template, attributing errors to FILE.
752 ps = ParseState(file, text)
753 t = parse_templseq(ps, False)
756 ###--------------------------------------------------------------------------
759 op = OP.OptionParser(
760 description = 'Generates files by filling in simple templates',
761 usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
762 version = 'Catacomb version @VERSION@')
763 def cb_gen(opt, optstr, arg, op):
764 op.values.input = arg
765 op.values.mode = 'gen'
766 for short, long, kw in [
767 ('-l', '--list', dict(
768 action = 'store_const', const = 'list', dest = 'mode',
769 help = 'list filenames generated')),
770 ('-g', '--generate', dict(
771 action = 'callback', metavar = 'TEMPLATE',
772 callback = cb_gen, type = 'string',
773 help = 'generate file(s) from TEMPLATE file'))]:
774 op.add_option(short, long, **kw)
775 op.set_defaults(mode = 'what?')
776 opts, args = op.parse_args()
778 if len(args) < 1: op.error('missing FILE')
780 for rel in args[1:]: read_thing(rel)
781 filetempl = compile_template('<output>', filepat)
783 def filenames(filetempl):
785 Generate the filenames in the compiled filename template FILETEMPL.
788 rr = filetempl.relations()
790 if not len(r): return
794 filetempl.subst(out, cs)
795 yield out.getvalue(), cs
796 if not cs.step(): break
800 if opts.mode == 'list':
801 for file, cs in filenames(filetempl): print(file)
802 elif opts.mode == 'gen':
803 with open(opts.input) as f:
804 templ = RepeatTemplate(compile_template(opts.input, f.read()))
805 for file, cs in filenames(filetempl):
807 with open(new, 'w') as out:
811 die('What am I doing here?')
813 ###----- That's all, folks --------------------------------------------------