symm/multigen: Some UI improvements.
[catacomb] / symm / multigen
1 #! @PYTHON@
2 ###
3 ### Generate files by filling in simple templates
4 ###
5 ### (c) 2013 Straylight/Edgeware
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This file is part of Catacomb.
11 ###
12 ### Catacomb is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU Library General Public License as
14 ### published by the Free Software Foundation; either version 2 of the
15 ### License, or (at your option) any later version.
16 ###
17 ### Catacomb is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU Library General Public License for more details.
21 ###
22 ### You should have received a copy of the GNU Library General Public
23 ### License along with Catacomb; if not, write to the Free
24 ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 ### MA 02111-1307, USA.
26
27 from __future__ import with_statement
28
29 import itertools as IT
30 import optparse as OP
31 import os as OS
32 import re as RX
33 from cStringIO import StringIO
34 from sys import argv, exit, stderr
35
36 ###--------------------------------------------------------------------------
37 ### Utilities.
38
39 QUIS = OS.path.basename(argv[0]) # Program name, for use in errors.
40
41 def die(msg):
42 """Report MSG as a fatal error, and exit."""
43 stderr.write('%s: %s\n' % (QUIS, msg))
44 exit(1)
45
46 def indexed(seq):
47 """
48 Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
49 """
50 return IT.izip(IT.count(), seq)
51
52 ###--------------------------------------------------------------------------
53 ### Reading the input values.
54
55 ## Map column names to (Relation, # index) pairs.
56 COLMAP = {}
57
58 class Cursor (object):
59 """
60 A Cursor object keeps track of an iteration through a Relation.
61
62 At any time, the Cursor has a `current' row; the individual cells of this
63 row may be retrieved using Python's standard indexing operator. The `step'
64 method advances to the next row (if there is one). The `reset' method
65 returns to row zero.
66 """
67
68 def __init__(me, rel):
69 """
70 Initialize a new Cursor object, tracking its way through a Relation REL.
71
72 The new Cursor has row zero as its current row. The REL must not be
73 empty.
74 """
75 me._rel = rel
76 me.reset()
77
78 def step(me):
79 """
80 Advance the Cursor to the next row.
81
82 Returns False if there is no next row; otherwise True.
83 """
84 me._i += 1
85 if me._i >= len(me._rel):
86 me._i = me._row = None
87 return False
88 me._row = me._rel[me._i]
89 return True
90
91 def reset(me):
92 """
93 Reset the Cursor, so that row zero is current again.
94 """
95 me._i = 0
96 me._row = me._rel[0]
97
98 def __getitem__(me, i):
99 """
100 Return the item in column I of the Cursor's current row.
101
102 The index must be acceptable to the underlying row object, but otherwise
103 the Cursor imposes no restrictions. Indices need not be numeric, for
104 example.
105 """
106 return me._row[i]
107
108 def __repr__(me):
109 """
110 Return a text description of the Cursor, for diagnostic use.
111 """
112 return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
113
114 class CursorSet (object):
115 """
116 A CursorSet iterates over the cartiesian product of a number of Relations.
117
118 More precisely: it maintains a stack, each level of which tracks a number
119 of Relations. More Relations can be pushed onto this stack with the `push'
120 method, and removed with `pop'. The `step' method advances through the
121 cartesian product of the Relations in the top level of the stack -- the
122 `active' Relations. Columns from the current rows of all of the currently
123 known Relations -- whether active or not -- can be extracted using `get'.
124 """
125
126 def __init__(me):
127 """
128 Initialize a new CursorSet object.
129
130 A new CursorSet has an empty stack.
131 """
132 me._map = {}
133 me._stack = []
134 me._act = None
135
136 def push(me, rels):
137 """
138 Push the new Relations RELS onto the stack and start iterating.
139
140 The currently active Relations are pushed down. Those Relations which are
141 not already known to the CursorSet become the newly active collection.
142 (Relations which are already known are simply ignored.)
143
144 Iteration traverses Relations on the right more rapidly.
145 """
146 cc = []
147 rr = []
148 for r in rels:
149 if r in me._map: continue
150 c = me._map[r] = Cursor(r)
151 rr.append(r)
152 cc.append(c)
153 me._stack.append((me._act, rr))
154 me._act = cc
155
156 def step(me):
157 """
158 Advance the CursorSet through the currently active Relations.
159
160 Return False if the active Relations have now been exhausted; otherwise
161 return True.
162 """
163 i = 0
164 while i < len(me._act):
165 if me._act[i].step(): return True
166 if i >= len(me._act): return False
167 me._act[i].reset()
168 i += 1
169 return False
170
171 def pop(me):
172 """
173 Pop the active Relations.
174
175 Return to iterating over the previously active collection.
176 """
177 me._act, rels = me._stack.pop()
178 for r in rels: del me._map[r]
179
180 def get(me, rel, i):
181 """
182 Return the item with index I in the current row of Relation REL.
183 """
184 return me._map[rel][i]
185
186 class Relation (object):
187 """
188 A Relation keeps track of a table of data.
189
190 A Relation consists of a `header', which is a sequence of string names,
191 and a rectangular array of data, each row of which has the same number of
192 items as the header.
193
194 Relations can be iterated over using Cursors and CursorSets.
195 """
196
197 def __init__(me, head):
198 """
199 Initialize a new, empty Relation with header HEAD.
200
201 The `COLMAP' dictionary is updated to map the names in the header to this
202 Relation and its column indices.
203 """
204 me._head = head
205 me._rows = []
206 for i, c in indexed(head): COLMAP[c] = me, i
207
208 def addrow(me, row):
209 """
210 Add a ROW to the Relation.
211
212 The new row must have the correct number of entries.
213 """
214 if len(row) != len(me._head):
215 die("mismatch: row `%s' doesn't match heading `%s'" %
216 (', '.join(row), ', '.join(me._head)))
217 me._rows.append(row)
218
219 def __len__(me):
220 """Return the number of rows in the Relation."""
221 return len(me._rows)
222
223 def __getitem__(me, i):
224 """Return the Ith row of the Relation."""
225 return me._rows[i]
226
227 def __repr__(me):
228 """Return a textual description of the Relation, for diagnostic use."""
229 return '#<Relation %r>' % me._head
230
231 def read_immediate(word):
232 """
233 Return a Relation constructed by parsing WORD.
234
235 The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
236 comma-separated lists of strings which will form the relation's header and
237 rows respectively. There is no way to include an item which contains a
238 comma or whitespace.
239 """
240 head, rels = word.split('=', 1)
241 rel = Relation([c.strip() for c in head.split(',')])
242 for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
243
244 def read_file(spec):
245 """
246 Return a Relation constructed from a file, according to SPEC.
247
248 The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
249 comma-separated list of strings to form the relation's header. Each line
250 from the file which is neither empty nor begins with `#' is split into
251 whitespace-separated words to form a row in the relation. There is no way
252 to include an item which contains whitespace.
253 """
254 file, head = spec.split(':', 1)
255 rel = Relation([c.strip() for c in head.split(',')])
256 with open(file) as f:
257 for line in f:
258 line = line.strip()
259 if line.startswith('#') or line == '': continue
260 rel.addrow(line.split())
261
262 def read_thing(spec):
263 """
264 Return a relation constructed from SPEC.
265
266 If SPEC begins with `@' then read the relation from a file (see
267 `read_file'); otherwise interpret it as immediate data (see
268 `read_immediate').
269 """
270 if spec.startswith('@'): read_file(spec[1:])
271 else: read_immediate(spec)
272
273 ###--------------------------------------------------------------------------
274 ### Template structure.
275
276 class BasicTemplate (object):
277 """
278 Base class for template objects.
279
280 The protocol for templates consists of two methods:
281
282 relations() Return a set of Relations mentioned at top-level in
283 substitutions in the template.
284
285 subst(OUT, CS) Fill in the template, writing the output to the
286 stream OUT. The CS is a CursorSet object tracking
287 the current iteration state.
288 """
289 pass
290
291 class LiteralTemplate (BasicTemplate):
292 """
293 A LiteralTemplate outputs a fixed string.
294 """
295
296 def __init__(me, text, **kw):
297 """
298 Initialize a new LiteralTemplate object. TEXT is the text to be written.
299 """
300 super(LiteralTemplate, me).__init__(**kw)
301 me._text = text
302
303 def relations(me):
304 """A LiteralTemplate contains no substitutions."""
305 return set()
306
307 def subst(me, out, cs):
308 """A LiteralTemplate just emits its text."""
309 out.write(me._text)
310
311 def __repr__(me):
312 return '#<LiteralTemplate %r>' % me._text
313
314 class TagTemplate (BasicTemplate):
315 """
316 A TagTemplate object expands a substitution tag.
317
318 It extracts an item from the current row of a relation, processes it
319 according to an operation, and outputs the result.
320 """
321
322 def __init__(me, rel, i, op, **kw):
323 """
324 Initialize a new TagTemplate object.
325
326 REL is the relation from which to pick the output; I is the column index;
327 OP is a transformation to apply to the data, and may be None to indicate
328 that the data should not be transformed.
329 """
330 super(TagTemplate, me).__init__(**kw)
331 me._rel = rel
332 me._i = i
333 me._op = op
334
335 def relations(me):
336 """The TagTemplate knows which relation it uses."""
337 return set([me._rel])
338
339 def subst(me, out, cs):
340 """
341 A TagTemplate extracts and transforms an item from the current row of
342 a relation.
343 """
344 val = cs.get(me._rel, me._i)
345 if me._op is not None: val = me._op(val)
346 out.write(val)
347
348 def __repr__(me):
349 return '#<TagTemplate %s>' % me._rel._head[me._i]
350
351 class SequenceTemplate (BasicTemplate):
352 """
353 A SequenceTemplate concatenates a number of other templates.
354 """
355
356 def __new__(cls, seq, **kw):
357 """
358 Construct a template from a sequence SEQ of other templates.
359
360 If SEQ is a singleton (which it often is) then return it directly;
361 otherwise construct a SequenceTemplate.
362 """
363 if len(seq) == 1:
364 return seq[0]
365 else:
366 return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
367
368 def __init__(me, seq, **kw):
369 """
370 Initialize a new SequenceTemplate object from SEQ.
371
372 The sequence is flattened out: if SEQ contains SequenceTemplates then we
373 use their children directly, so that we don't have a useless tree.
374 """
375 super(SequenceTemplate, me).__init__(**kw)
376 tt = []
377 cls = type(me)
378 for t in seq:
379 if isinstance(t, cls): tt += t._seq
380 else: tt.append(t)
381 me._seq = tt
382
383 def relations(me):
384 """
385 The relations of a SequenceTemplate are the union of the relations of its
386 children.
387 """
388 rr = set()
389 for t in me._seq: rr.update(t.relations())
390 return rr
391
392 def subst(me, out, cs):
393 """
394 The output of a SequenceTemplate is the concatenation of the expansions
395 of its children.
396 """
397 for t in me._seq: t.subst(out, cs)
398
399 def __repr__(me):
400 return '#<SequenceTemplate %r>' % me._seq
401
402 class RepeatTemplate (BasicTemplate):
403 """
404 A RepeatTemplate iterates its body over a number of relations.
405 """
406
407 def __init__(me, sub):
408 """
409 Initialize a new RepeatTemplate, given a template to act as its body.
410 """
411 me._sub = sub
412
413 def relations(me):
414 """
415 A RepeatTemplate hides the relations of its body.
416 """
417 return set()
418
419 def subst(me, out, cs):
420 """
421 Substitute a RepeatTemplate, by iterating over the relations mentioned in
422 its body template.
423 """
424 rr = me._sub.relations()
425 for r in rr:
426 if len(r) == 0: return
427 cs.push(rr)
428 while True:
429 me._sub.subst(out, cs)
430 if not cs.step(): break
431 cs.pop()
432
433 def __repr__(me):
434 return '#<RepeatTemplate %r>' % me._sub
435
436 ###--------------------------------------------------------------------------
437 ### Some slightly cheesy parsing machinery.
438
439 class ParseState (object):
440 """
441 A ParseState object keeps track of a parser's position in a file.
442
443 The `curr' slot contains the current line under consideration.
444 """
445
446 def __init__(me, file, text):
447 """
448 Initialize a ParseState object.
449
450 The FILE is a string naming the source file, and the TEXT is an iterator
451 over the file's lines.
452 """
453 me._file = file
454 me._i = 0
455 me._it = iter(text.splitlines(True))
456 me.step()
457
458 def step(me):
459 """
460 Advance the ParseState to the next line.
461
462 Sets `curr' to the next line, or to None if the input is exhausted.
463 """
464 try: me.curr = me._it.next()
465 except StopIteration: me.curr = None
466 else: me._i += 1
467
468 def error(me, msg):
469 """
470 Report a fatal error during parsing, attributing it to the current line.
471 """
472 die('%s:%d: %s' % (me._file, me._i, msg))
473
474 class token (object):
475 """
476 A token object has no interesting properties other than its identity.
477 """
478
479 def __init__(me, name):
480 """Initialize a new token, with the given NAME."""
481 me._name = name
482 def __repr__(me):
483 """Return a description of the token, for diagnostic purposes."""
484 return '#<%s>' % me._name
485
486 ## Some magical tokens useful during parsing.
487 EOF = token('eof')
488 END = token('end')
489
490 ## Regular expressions matching substitution tags.
491 R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
492 R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
493
494 ## A dictionary mapping operation names to functions which implement them.
495 OPMAP = {}
496
497 def defop(func):
498 """
499 Decorator for substitution operator functions.
500
501 Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
502 name, removing a prefix `op_' if there is one.
503
504 An operator function is given the raw value as an argument and should
505 return the transformed value.
506 """
507 name = func.func_name
508 if name.startswith('op_'): name = name[3:]
509 OPMAP[name] = func
510 return func
511
512 @defop
513 def op_u(val):
514 """@{COLUMN:u} -- the item in upper case."""
515 return val.upper()
516
517 @defop
518 def op_l(val):
519 """@{COLUMN:l} -- the item in upper case."""
520 return val.lower()
521
522 R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
523 @defop
524 def op_c(val):
525 """
526 @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
527 """
528 return R_NOTIDENT.sub('_', val)
529
530 def _pairify(val):
531 """
532 Split VAL into two, at an `=' sign.
533
534 If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
535 otherwise return (VAL, VAL).
536 """
537 c = val.find('=')
538 if c >= 0: return val[:c], val[c + 1:]
539 else: return val, val
540
541 @defop
542 def op_left(val):
543 """@{COLUMN:left} -- the left-hand side of the item."""
544 return _pairify(val)[0]
545 @defop
546 def op_right(val):
547 """@{COLUMN:right} -- the left-hand side of the item."""
548 return _pairify(val)[1]
549
550 def parse_text(ps):
551 """
552 Parse a chunk of text from a ParseState.
553
554 Stop when we get to something which looks like a template keyword, but
555 extract tags. Return the resulting template.
556
557 Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may
558 contain comments beginning `%#', which are ignored, and lines beginning
559 `%%' which have the initial `%' removed and are otherwise treated as normal
560 text (and, in particular, may contain tags). Other lines beginning with
561 `%' are directives and must be processed by our caller.
562 """
563
564 ## Starting out: no templates collected, and an empty buffer of literal
565 ## text.
566 tt = []
567 lit = StringIO()
568
569 def spill():
570 ## Spill accumulated literal text from `lit' into a LiteralTemplate
571 ## object.
572 l = lit.getvalue()
573 if l: tt.append(LiteralTemplate(l))
574 lit.reset()
575 lit.truncate()
576
577 ## Iterate over the lines of input.
578 while True:
579 line = ps.curr
580
581 ## Stop if there's no more text; handle lines beginning with `%'.
582 if line is None: break
583 elif line.startswith('%'):
584 if line.startswith('%#'): ps.step(); continue
585 elif line.startswith('%%'): line = line[1:]
586 else: break
587
588 ## Work through the line, finding tags.
589 i = 0
590 while True:
591
592 ## If there are no more `@' signs, there can be no more tags, and we're
593 ## done.
594 j = line.find('@', i)
595 if j < 0: break
596
597 ## Write the chunk we've found.
598 lit.write(line[i:j])
599
600 ## If the next character is also `@' then this is an escape and we
601 ## should carry on.
602 if line[j:].startswith('@@'):
603 lit.write('@')
604 i = j + 2
605 continue
606
607 ## Parse the tag into a column name, and maybe some operators.
608 m = R_SIMPLETAG.match(line, j)
609 if not m: m = R_COMPLEXTAG.match(line, j)
610 if not m: ps.error('invalid tag')
611 col = m.group(1)
612 try: rel, i = COLMAP[col]
613 except KeyError: ps.error("unknown column `%s'" % col)
614 ops = m.lastindex >= 2 and m.group(2)
615
616 ## If we have operators then look them up and compose them.
617 wholeop = None
618 if ops:
619 for opname in ops[1:].split(':'):
620 try: op = OPMAP[opname]
621 except KeyError: ps.error("unknown operation `%s'" % opname)
622 if wholeop is None: wholeop = op
623 else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
624
625 ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
626 ## for the tag.
627 spill()
628 tt.append(TagTemplate(rel, i, wholeop))
629
630 ## Continue from after the tag.
631 i = m.end()
632
633 ## Finished a line. Write out the remainder of the line and move onto
634 ## the next.
635 lit.write(line[i:])
636 ps.step()
637
638 ## Run out of things to do. Flush out the rest of the literal text and
639 ## combine the templates.
640 spill()
641 return SequenceTemplate(tt)
642
643 ## A dictionary mapping regular expressions to directive-processing functions.
644 DIRECT = []
645
646 def direct(rx):
647 """
648 Function decorator for template file directives.
649
650 Associate the regular expression RX with the function in `DIRECT'.
651 Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
652 and M is the match object resulting from matching RX against the directive
653 text.
654 """
655 def _(func):
656 DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
657 return func
658 return _
659
660 def parse_template(ps):
661 """
662 Parse a single template from the ParseState PS.
663
664 A single template is either a chunk of text (parsed by `parse_text') or a
665 directive (handled by the appropriate function in `DIRECT').
666
667 Returns either a template object, or a special token. In particular, `EOF'
668 is returned if we run out of text; directives may return other tokens.
669 """
670
671 ## Skip initial comments. Otherwise we might end up with an empty
672 ## SequenceTemplate here.
673 while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
674
675 ## If we've run out of input, return `EOF' here. A line beginning `%%', or
676 ## not beginning `%', means we've found a chunk of text. Otherwise find
677 ## the right directive handler.
678 if ps.curr is None: return EOF
679 elif ps.curr.startswith('%'):
680 if ps.curr.startswith('%%'): return parse_text(ps)
681 for rx, func in DIRECT:
682 line = ps.curr[1:].strip()
683 m = rx.match(line)
684 if m:
685 ps.step()
686 return func(ps, m)
687 ps.error("unrecognized directive")
688 else:
689 return parse_text(ps)
690
691 def parse_templseq(ps, nestp):
692 """
693 Parse a sequence of templates from the ParseState PS.
694
695 Calls `parse_template' repeatedly If NESTP is true, then an `END' token
696 (presumably from a directive handler) is permitted and halts parsing;
697 otherwise `END' signifies an error.
698
699 Returns a template object.
700 """
701
702 tt = []
703 while True:
704 t = parse_template(ps)
705 if t is END:
706 if nestp: break
707 else: ps.error("unexpected `end' directive")
708 elif t is EOF:
709 if nestp: ps.error("unexpected end of file")
710 else: break
711 tt.append(t)
712 return SequenceTemplate(tt)
713
714 @direct(r'repeat')
715 def dir_repeat(ps, m):
716 """
717 %repeat
718 BODY
719 %end
720
721 Iterate the body over the cartesian product of the relations mentioned
722 within.
723 """
724 return RepeatTemplate(parse_templseq(ps, True))
725
726 @direct(r'end')
727 def dir_end(ps, m):
728 """%end -- an end marker used to delimet chunks of template."""
729 return END
730
731 def compile_template(file, text):
732 """
733 Compile TEXT into a template, attributing errors to FILE.
734 """
735 ps = ParseState(file, text)
736 t = parse_templseq(ps, False)
737 return t
738
739 ###--------------------------------------------------------------------------
740 ### Main code.
741
742 op = OP.OptionParser(
743 description = 'Generates files by filling in simple templates',
744 usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
745 version = 'Catacomb version @VERSION@')
746 def cb_gen(opt, optstr, arg, op):
747 op.values.input = arg
748 op.values.mode = 'gen'
749 for short, long, kw in [
750 ('-l', '--list', dict(
751 action = 'store_const', const = 'list', dest = 'mode',
752 help = 'list filenames generated')),
753 ('-g', '--generate', dict(
754 action = 'callback', metavar = 'TEMPLATE',
755 callback = cb_gen, type = 'string',
756 help = 'generate file(s) from TEMPLATE file'))]:
757 op.add_option(short, long, **kw)
758 op.set_defaults(mode = 'what?')
759 opts, args = op.parse_args()
760
761 if len(args) < 1: op.error('missing FILE')
762 filepat = args[0]
763 for rel in args[1:]: read_thing(rel)
764 filetempl = compile_template('<output>', filepat)
765
766 def filenames(filetempl):
767 """
768 Generate the filenames in the compiled filename template FILETEMPL.
769 """
770 cs = CursorSet()
771 rr = filetempl.relations()
772 for r in rr:
773 if not len(r): return
774 cs.push(rr)
775 while True:
776 out = StringIO()
777 filetempl.subst(out, cs)
778 yield out.getvalue(), cs
779 if not cs.step(): break
780 cs.pop()
781
782 ## Main dispatch.
783 if opts.mode == 'list':
784 for file, cs in filenames(filetempl): print file
785 elif opts.mode == 'gen':
786 with open(opts.input) as f:
787 templ = RepeatTemplate(compile_template(opts.input, f.read()))
788 for file, cs in filenames(filetempl):
789 new = file + '.new'
790 with open(new, 'w') as out:
791 templ.subst(out, cs)
792 OS.rename(new, file)
793 else:
794 die('What am I doing here?')
795
796 ###----- That's all, folks --------------------------------------------------