| 1 | #! @PYTHON@ |
| 2 | ### |
| 3 | ### Generate files by filling in simple templates |
| 4 | ### |
| 5 | ### (c) 2013 Straylight/Edgeware |
| 6 | ### |
| 7 | |
| 8 | ###----- Licensing notice --------------------------------------------------- |
| 9 | ### |
| 10 | ### This file is part of Catacomb. |
| 11 | ### |
| 12 | ### Catacomb is free software; you can redistribute it and/or modify |
| 13 | ### it under the terms of the GNU Library General Public License as |
| 14 | ### published by the Free Software Foundation; either version 2 of the |
| 15 | ### License, or (at your option) any later version. |
| 16 | ### |
| 17 | ### Catacomb is distributed in the hope that it will be useful, |
| 18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ### GNU Library General Public License for more details. |
| 21 | ### |
| 22 | ### You should have received a copy of the GNU Library General Public |
| 23 | ### License along with Catacomb; if not, write to the Free |
| 24 | ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| 25 | ### MA 02111-1307, USA. |
| 26 | |
| 27 | from __future__ import with_statement |
| 28 | |
| 29 | import itertools as IT |
| 30 | import optparse as OP |
| 31 | import os as OS |
| 32 | import re as RX |
| 33 | from cStringIO import StringIO |
| 34 | from sys import argv, exit, stderr |
| 35 | |
| 36 | ###-------------------------------------------------------------------------- |
| 37 | ### Utilities. |
| 38 | |
| 39 | QUIS = OS.path.basename(argv[0]) # Program name, for use in errors. |
| 40 | |
| 41 | def die(msg): |
| 42 | """Report MSG as a fatal error, and exit.""" |
| 43 | stderr.write('%s: %s\n' % (QUIS, msg)) |
| 44 | exit(1) |
| 45 | |
| 46 | def indexed(seq): |
| 47 | """ |
| 48 | Generate pairs (I, X), where I counts from zero and X are the items of SEQ. |
| 49 | """ |
| 50 | return IT.izip(IT.count(), seq) |
| 51 | |
| 52 | ###-------------------------------------------------------------------------- |
| 53 | ### Reading the input values. |
| 54 | |
| 55 | ## Map column names to (Relation, # index) pairs. |
| 56 | COLMAP = {} |
| 57 | |
| 58 | class Cursor (object): |
| 59 | """ |
| 60 | A Cursor object keeps track of an iteration through a Relation. |
| 61 | |
| 62 | At any time, the Cursor has a `current' row; the individual cells of this |
| 63 | row may be retrieved using Python's standard indexing operator. The `step' |
| 64 | method advances to the next row (if there is one). The `reset' method |
| 65 | returns to row zero. |
| 66 | """ |
| 67 | |
| 68 | def __init__(me, rel): |
| 69 | """ |
| 70 | Initialize a new Cursor object, tracking its way through a Relation REL. |
| 71 | |
| 72 | The new Cursor has row zero as its current row. The REL must not be |
| 73 | empty. |
| 74 | """ |
| 75 | me._rel = rel |
| 76 | me.reset() |
| 77 | |
| 78 | def step(me): |
| 79 | """ |
| 80 | Advance the Cursor to the next row. |
| 81 | |
| 82 | Returns False if there is no next row; otherwise True. |
| 83 | """ |
| 84 | me._i += 1 |
| 85 | if me._i >= len(me._rel): |
| 86 | me._i = me._row = None |
| 87 | return False |
| 88 | me._row = me._rel[me._i] |
| 89 | return True |
| 90 | |
| 91 | def reset(me): |
| 92 | """ |
| 93 | Reset the Cursor, so that row zero is current again. |
| 94 | """ |
| 95 | me._i = 0 |
| 96 | me._row = me._rel[0] |
| 97 | |
| 98 | def __getitem__(me, i): |
| 99 | """ |
| 100 | Return the item in column I of the Cursor's current row. |
| 101 | |
| 102 | The index must be acceptable to the underlying row object, but otherwise |
| 103 | the Cursor imposes no restrictions. Indices need not be numeric, for |
| 104 | example. |
| 105 | """ |
| 106 | return me._row[i] |
| 107 | |
| 108 | def __repr__(me): |
| 109 | """ |
| 110 | Return a text description of the Cursor, for diagnostic use. |
| 111 | """ |
| 112 | return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row) |
| 113 | |
| 114 | class CursorSet (object): |
| 115 | """ |
| 116 | A CursorSet iterates over the cartiesian product of a number of Relations. |
| 117 | |
| 118 | More precisely: it maintains a stack, each level of which tracks a number |
| 119 | of Relations. More Relations can be pushed onto this stack with the `push' |
| 120 | method, and removed with `pop'. The `step' method advances through the |
| 121 | cartesian product of the Relations in the top level of the stack -- the |
| 122 | `active' Relations. Columns from the current rows of all of the currently |
| 123 | known Relations -- whether active or not -- can be extracted using `get'. |
| 124 | """ |
| 125 | |
| 126 | def __init__(me): |
| 127 | """ |
| 128 | Initialize a new CursorSet object. |
| 129 | |
| 130 | A new CursorSet has an empty stack. |
| 131 | """ |
| 132 | me._map = {} |
| 133 | me._stack = [] |
| 134 | me._act = None |
| 135 | |
| 136 | def push(me, rels): |
| 137 | """ |
| 138 | Push the new Relations RELS onto the stack and start iterating. |
| 139 | |
| 140 | The currently active Relations are pushed down. Those Relations which are |
| 141 | not already known to the CursorSet become the newly active collection. |
| 142 | (Relations which are already known are simply ignored.) |
| 143 | |
| 144 | Iteration traverses Relations on the right more rapidly. |
| 145 | """ |
| 146 | cc = [] |
| 147 | rr = [] |
| 148 | for r in rels: |
| 149 | if r in me._map: continue |
| 150 | c = me._map[r] = Cursor(r) |
| 151 | rr.append(r) |
| 152 | cc.append(c) |
| 153 | me._stack.append((me._act, rr)) |
| 154 | me._act = cc |
| 155 | |
| 156 | def step(me): |
| 157 | """ |
| 158 | Advance the CursorSet through the currently active Relations. |
| 159 | |
| 160 | Return False if the active Relations have now been exhausted; otherwise |
| 161 | return True. |
| 162 | """ |
| 163 | i = 0 |
| 164 | while i < len(me._act): |
| 165 | if me._act[i].step(): return True |
| 166 | if i >= len(me._act): return False |
| 167 | me._act[i].reset() |
| 168 | i += 1 |
| 169 | return False |
| 170 | |
| 171 | def pop(me): |
| 172 | """ |
| 173 | Pop the active Relations. |
| 174 | |
| 175 | Return to iterating over the previously active collection. |
| 176 | """ |
| 177 | me._act, rels = me._stack.pop() |
| 178 | for r in rels: del me._map[r] |
| 179 | |
| 180 | def get(me, rel, i): |
| 181 | """ |
| 182 | Return the item with index I in the current row of Relation REL. |
| 183 | """ |
| 184 | return me._map[rel][i] |
| 185 | |
| 186 | class Relation (object): |
| 187 | """ |
| 188 | A Relation keeps track of a table of data. |
| 189 | |
| 190 | A Relation consists of a `header', which is a sequence of string names, |
| 191 | and a rectangular array of data, each row of which has the same number of |
| 192 | items as the header. |
| 193 | |
| 194 | Relations can be iterated over using Cursors and CursorSets. |
| 195 | """ |
| 196 | |
| 197 | def __init__(me, head): |
| 198 | """ |
| 199 | Initialize a new, empty Relation with header HEAD. |
| 200 | |
| 201 | The `COLMAP' dictionary is updated to map the names in the header to this |
| 202 | Relation and its column indices. |
| 203 | """ |
| 204 | me._head = head |
| 205 | me._rows = [] |
| 206 | for i, c in indexed(head): COLMAP[c] = me, i |
| 207 | |
| 208 | def addrow(me, row): |
| 209 | """ |
| 210 | Add a ROW to the Relation. |
| 211 | |
| 212 | The new row must have the correct number of entries. |
| 213 | """ |
| 214 | if len(row) != len(me._head): |
| 215 | die("mismatch: row `%s' doesn't match heading `%s'" % |
| 216 | (', '.join(row), ', '.join(me._head))) |
| 217 | me._rows.append(row) |
| 218 | |
| 219 | def __len__(me): |
| 220 | """Return the number of rows in the Relation.""" |
| 221 | return len(me._rows) |
| 222 | |
| 223 | def __getitem__(me, i): |
| 224 | """Return the Ith row of the Relation.""" |
| 225 | return me._rows[i] |
| 226 | |
| 227 | def __repr__(me): |
| 228 | """Return a textual description of the Relation, for diagnostic use.""" |
| 229 | return '#<Relation %r>' % me._head |
| 230 | |
| 231 | def read_immediate(word): |
| 232 | """ |
| 233 | Return a Relation constructed by parsing WORD. |
| 234 | |
| 235 | The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are |
| 236 | comma-separated lists of strings which will form the relation's header and |
| 237 | rows respectively. There is no way to include an item which contains a |
| 238 | comma or whitespace. |
| 239 | """ |
| 240 | head, rels = word.split('=', 1) |
| 241 | rel = Relation([c.strip() for c in head.split(',')]) |
| 242 | for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')]) |
| 243 | |
| 244 | def read_file(spec): |
| 245 | """ |
| 246 | Return a Relation constructed from a file, according to SPEC. |
| 247 | |
| 248 | The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a |
| 249 | comma-separated list of strings to form the relation's header. Each line |
| 250 | from the file which is neither empty nor begins with `#' is split into |
| 251 | whitespace-separated words to form a row in the relation. There is no way |
| 252 | to include an item which contains whitespace. |
| 253 | """ |
| 254 | file, head = spec.split(':', 1) |
| 255 | rel = Relation([c.strip() for c in head.split(',')]) |
| 256 | with open(file) as f: |
| 257 | for line in f: |
| 258 | line = line.strip() |
| 259 | if line.startswith('#') or line == '': continue |
| 260 | rel.addrow(line.split()) |
| 261 | |
| 262 | def read_thing(spec): |
| 263 | """ |
| 264 | Return a relation constructed from SPEC. |
| 265 | |
| 266 | If SPEC begins with `@' then read the relation from a file (see |
| 267 | `read_file'); otherwise interpret it as immediate data (see |
| 268 | `read_immediate'). |
| 269 | """ |
| 270 | if spec.startswith('@'): read_file(spec[1:]) |
| 271 | else: read_immediate(spec) |
| 272 | |
| 273 | ###-------------------------------------------------------------------------- |
| 274 | ### Template structure. |
| 275 | |
| 276 | class BasicTemplate (object): |
| 277 | """ |
| 278 | Base class for template objects. |
| 279 | |
| 280 | The protocol for templates consists of two methods: |
| 281 | |
| 282 | relations() Return a set of Relations mentioned at top-level in |
| 283 | substitutions in the template. |
| 284 | |
| 285 | subst(OUT, CS) Fill in the template, writing the output to the |
| 286 | stream OUT. The CS is a CursorSet object tracking |
| 287 | the current iteration state. |
| 288 | """ |
| 289 | pass |
| 290 | |
| 291 | class LiteralTemplate (BasicTemplate): |
| 292 | """ |
| 293 | A LiteralTemplate outputs a fixed string. |
| 294 | """ |
| 295 | |
| 296 | def __init__(me, text, **kw): |
| 297 | """ |
| 298 | Initialize a new LiteralTemplate object. TEXT is the text to be written. |
| 299 | """ |
| 300 | super(LiteralTemplate, me).__init__(**kw) |
| 301 | me._text = text |
| 302 | |
| 303 | def relations(me): |
| 304 | """A LiteralTemplate contains no substitutions.""" |
| 305 | return set() |
| 306 | |
| 307 | def subst(me, out, cs): |
| 308 | """A LiteralTemplate just emits its text.""" |
| 309 | out.write(me._text) |
| 310 | |
| 311 | def __repr__(me): |
| 312 | return '#<LiteralTemplate %r>' % me._text |
| 313 | |
| 314 | class TagTemplate (BasicTemplate): |
| 315 | """ |
| 316 | A TagTemplate object expands a substitution tag. |
| 317 | |
| 318 | It extracts an item from the current row of a relation, processes it |
| 319 | according to an operation, and outputs the result. |
| 320 | """ |
| 321 | |
| 322 | def __init__(me, rel, i, op, **kw): |
| 323 | """ |
| 324 | Initialize a new TagTemplate object. |
| 325 | |
| 326 | REL is the relation from which to pick the output; I is the column index; |
| 327 | OP is a transformation to apply to the data, and may be None to indicate |
| 328 | that the data should not be transformed. |
| 329 | """ |
| 330 | super(TagTemplate, me).__init__(**kw) |
| 331 | me._rel = rel |
| 332 | me._i = i |
| 333 | me._op = op |
| 334 | |
| 335 | def relations(me): |
| 336 | """The TagTemplate knows which relation it uses.""" |
| 337 | return set([me._rel]) |
| 338 | |
| 339 | def subst(me, out, cs): |
| 340 | """ |
| 341 | A TagTemplate extracts and transforms an item from the current row of |
| 342 | a relation. |
| 343 | """ |
| 344 | val = cs.get(me._rel, me._i) |
| 345 | if me._op is not None: val = me._op(val) |
| 346 | out.write(val) |
| 347 | |
| 348 | def __repr__(me): |
| 349 | return '#<TagTemplate %s>' % me._rel._head[me._i] |
| 350 | |
| 351 | class SequenceTemplate (BasicTemplate): |
| 352 | """ |
| 353 | A SequenceTemplate concatenates a number of other templates. |
| 354 | """ |
| 355 | |
| 356 | def __new__(cls, seq, **kw): |
| 357 | """ |
| 358 | Construct a template from a sequence SEQ of other templates. |
| 359 | |
| 360 | If SEQ is a singleton (which it often is) then return it directly; |
| 361 | otherwise construct a SequenceTemplate. |
| 362 | """ |
| 363 | if len(seq) == 1: |
| 364 | return seq[0] |
| 365 | else: |
| 366 | return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw) |
| 367 | |
| 368 | def __init__(me, seq, **kw): |
| 369 | """ |
| 370 | Initialize a new SequenceTemplate object from SEQ. |
| 371 | |
| 372 | The sequence is flattened out: if SEQ contains SequenceTemplates then we |
| 373 | use their children directly, so that we don't have a useless tree. |
| 374 | """ |
| 375 | super(SequenceTemplate, me).__init__(**kw) |
| 376 | tt = [] |
| 377 | cls = type(me) |
| 378 | for t in seq: |
| 379 | if isinstance(t, cls): tt += t._seq |
| 380 | else: tt.append(t) |
| 381 | me._seq = tt |
| 382 | |
| 383 | def relations(me): |
| 384 | """ |
| 385 | The relations of a SequenceTemplate are the union of the relations of its |
| 386 | children. |
| 387 | """ |
| 388 | rr = set() |
| 389 | for t in me._seq: rr.update(t.relations()) |
| 390 | return rr |
| 391 | |
| 392 | def subst(me, out, cs): |
| 393 | """ |
| 394 | The output of a SequenceTemplate is the concatenation of the expansions |
| 395 | of its children. |
| 396 | """ |
| 397 | for t in me._seq: t.subst(out, cs) |
| 398 | |
| 399 | def __repr__(me): |
| 400 | return '#<SequenceTemplate %r>' % me._seq |
| 401 | |
| 402 | class RepeatTemplate (BasicTemplate): |
| 403 | """ |
| 404 | A RepeatTemplate iterates its body over a number of relations. |
| 405 | """ |
| 406 | |
| 407 | def __init__(me, sub): |
| 408 | """ |
| 409 | Initialize a new RepeatTemplate, given a template to act as its body. |
| 410 | """ |
| 411 | me._sub = sub |
| 412 | |
| 413 | def relations(me): |
| 414 | """ |
| 415 | A RepeatTemplate hides the relations of its body. |
| 416 | """ |
| 417 | return set() |
| 418 | |
| 419 | def subst(me, out, cs): |
| 420 | """ |
| 421 | Substitute a RepeatTemplate, by iterating over the relations mentioned in |
| 422 | its body template. |
| 423 | """ |
| 424 | rr = me._sub.relations() |
| 425 | for r in rr: |
| 426 | if len(r) == 0: return |
| 427 | cs.push(rr) |
| 428 | while True: |
| 429 | me._sub.subst(out, cs) |
| 430 | if not cs.step(): break |
| 431 | cs.pop() |
| 432 | |
| 433 | def __repr__(me): |
| 434 | return '#<RepeatTemplate %r>' % me._sub |
| 435 | |
| 436 | ###-------------------------------------------------------------------------- |
| 437 | ### Some slightly cheesy parsing machinery. |
| 438 | |
| 439 | class ParseState (object): |
| 440 | """ |
| 441 | A ParseState object keeps track of a parser's position in a file. |
| 442 | |
| 443 | The `curr' slot contains the current line under consideration. |
| 444 | """ |
| 445 | |
| 446 | def __init__(me, file, text): |
| 447 | """ |
| 448 | Initialize a ParseState object. |
| 449 | |
| 450 | The FILE is a string naming the source file, and the TEXT is an iterator |
| 451 | over the file's lines. |
| 452 | """ |
| 453 | me._file = file |
| 454 | me._i = 0 |
| 455 | me._it = iter(text.splitlines(True)) |
| 456 | me.step() |
| 457 | |
| 458 | def step(me): |
| 459 | """ |
| 460 | Advance the ParseState to the next line. |
| 461 | |
| 462 | Sets `curr' to the next line, or to None if the input is exhausted. |
| 463 | """ |
| 464 | try: me.curr = me._it.next() |
| 465 | except StopIteration: me.curr = None |
| 466 | else: me._i += 1 |
| 467 | |
| 468 | def error(me, msg): |
| 469 | """ |
| 470 | Report a fatal error during parsing, attributing it to the current line. |
| 471 | """ |
| 472 | die('%s:%d: %s' % (me._file, me._i, msg)) |
| 473 | |
| 474 | class token (object): |
| 475 | """ |
| 476 | A token object has no interesting properties other than its identity. |
| 477 | """ |
| 478 | |
| 479 | def __init__(me, name): |
| 480 | """Initialize a new token, with the given NAME.""" |
| 481 | me._name = name |
| 482 | def __repr__(me): |
| 483 | """Return a description of the token, for diagnostic purposes.""" |
| 484 | return '#<%s>' % me._name |
| 485 | |
| 486 | ## Some magical tokens useful during parsing. |
| 487 | EOF = token('eof') |
| 488 | END = token('end') |
| 489 | |
| 490 | ## Regular expressions matching substitution tags. |
| 491 | R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE) |
| 492 | R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE) |
| 493 | |
| 494 | ## A dictionary mapping operation names to functions which implement them. |
| 495 | OPMAP = {} |
| 496 | |
| 497 | def defop(func): |
| 498 | """ |
| 499 | Decorator for substitution operator functions. |
| 500 | |
| 501 | Remember the operator in `OPMAP'; the operator's name is taken from FUNC's |
| 502 | name, removing a prefix `op_' if there is one. |
| 503 | |
| 504 | An operator function is given the raw value as an argument and should |
| 505 | return the transformed value. |
| 506 | """ |
| 507 | name = func.func_name |
| 508 | if name.startswith('op_'): name = name[3:] |
| 509 | OPMAP[name] = func |
| 510 | return func |
| 511 | |
| 512 | @defop |
| 513 | def op_u(val): |
| 514 | """@{COLUMN:u} -- the item in upper case.""" |
| 515 | return val.upper() |
| 516 | |
| 517 | @defop |
| 518 | def op_l(val): |
| 519 | """@{COLUMN:l} -- the item in upper case.""" |
| 520 | return val.lower() |
| 521 | |
| 522 | @defop |
| 523 | def op_f(val): |
| 524 | """@{COLUMN:f} -- the item, with `/' characters replaced by `-'.""" |
| 525 | return val.replace('/', '-') |
| 526 | |
| 527 | R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+') |
| 528 | @defop |
| 529 | def op_c(val): |
| 530 | """ |
| 531 | @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'. |
| 532 | """ |
| 533 | return R_NOTIDENT.sub('_', val) |
| 534 | |
| 535 | def _pairify(val): |
| 536 | """ |
| 537 | Split VAL into two, at an `=' sign. |
| 538 | |
| 539 | If VAL has the form `THIS=THAT' then return the pair (THIS, THAT); |
| 540 | otherwise return (VAL, VAL). |
| 541 | """ |
| 542 | c = val.find('=') |
| 543 | if c >= 0: return val[:c], val[c + 1:] |
| 544 | else: return val, val |
| 545 | |
| 546 | @defop |
| 547 | def op_left(val): |
| 548 | """@{COLUMN:left} -- the left-hand side of the item.""" |
| 549 | return _pairify(val)[0] |
| 550 | @defop |
| 551 | def op_right(val): |
| 552 | """@{COLUMN:right} -- the left-hand side of the item.""" |
| 553 | return _pairify(val)[1] |
| 554 | |
| 555 | def parse_text(ps): |
| 556 | """ |
| 557 | Parse a chunk of text from a ParseState. |
| 558 | |
| 559 | Stop when we get to something which looks like a template keyword, but |
| 560 | extract tags. Return the resulting template. |
| 561 | |
| 562 | Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may |
| 563 | contain comments beginning `%#', which are ignored, and lines beginning |
| 564 | `%%' which have the initial `%' removed and are otherwise treated as normal |
| 565 | text (and, in particular, may contain tags). Other lines beginning with |
| 566 | `%' are directives and must be processed by our caller. |
| 567 | """ |
| 568 | |
| 569 | ## Starting out: no templates collected, and an empty buffer of literal |
| 570 | ## text. |
| 571 | tt = [] |
| 572 | lit = StringIO() |
| 573 | |
| 574 | def spill(): |
| 575 | ## Spill accumulated literal text from `lit' into a LiteralTemplate |
| 576 | ## object. |
| 577 | l = lit.getvalue() |
| 578 | if l: tt.append(LiteralTemplate(l)) |
| 579 | lit.reset() |
| 580 | lit.truncate() |
| 581 | |
| 582 | ## Iterate over the lines of input. |
| 583 | while True: |
| 584 | line = ps.curr |
| 585 | |
| 586 | ## Stop if there's no more text; handle lines beginning with `%'. |
| 587 | if line is None: break |
| 588 | elif line.startswith('%'): |
| 589 | if line.startswith('%#'): ps.step(); continue |
| 590 | elif line.startswith('%%'): line = line[1:] |
| 591 | else: break |
| 592 | |
| 593 | ## Work through the line, finding tags. |
| 594 | i = 0 |
| 595 | while True: |
| 596 | |
| 597 | ## If there are no more `@' signs, there can be no more tags, and we're |
| 598 | ## done. |
| 599 | j = line.find('@', i) |
| 600 | if j < 0: break |
| 601 | |
| 602 | ## Write the chunk we've found. |
| 603 | lit.write(line[i:j]) |
| 604 | |
| 605 | ## If the next character is also `@' then this is an escape and we |
| 606 | ## should carry on. |
| 607 | if line[j:].startswith('@@'): |
| 608 | lit.write('@') |
| 609 | i = j + 2 |
| 610 | continue |
| 611 | |
| 612 | ## Parse the tag into a column name, and maybe some operators. |
| 613 | m = R_SIMPLETAG.match(line, j) |
| 614 | if not m: m = R_COMPLEXTAG.match(line, j) |
| 615 | if not m: ps.error('invalid tag') |
| 616 | col = m.group(1) |
| 617 | try: rel, i = COLMAP[col] |
| 618 | except KeyError: ps.error("unknown column `%s'" % col) |
| 619 | ops = m.lastindex >= 2 and m.group(2) |
| 620 | |
| 621 | ## If we have operators then look them up and compose them. |
| 622 | wholeop = None |
| 623 | if ops: |
| 624 | for opname in ops[1:].split(':'): |
| 625 | try: op = OPMAP[opname] |
| 626 | except KeyError: ps.error("unknown operation `%s'" % opname) |
| 627 | if wholeop is None: wholeop = op |
| 628 | else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop) |
| 629 | |
| 630 | ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate |
| 631 | ## for the tag. |
| 632 | spill() |
| 633 | tt.append(TagTemplate(rel, i, wholeop)) |
| 634 | |
| 635 | ## Continue from after the tag. |
| 636 | i = m.end() |
| 637 | |
| 638 | ## Finished a line. Write out the remainder of the line and move onto |
| 639 | ## the next. |
| 640 | lit.write(line[i:]) |
| 641 | ps.step() |
| 642 | |
| 643 | ## Run out of things to do. Flush out the rest of the literal text and |
| 644 | ## combine the templates. |
| 645 | spill() |
| 646 | return SequenceTemplate(tt) |
| 647 | |
| 648 | ## A dictionary mapping regular expressions to directive-processing functions. |
| 649 | DIRECT = [] |
| 650 | |
| 651 | def direct(rx): |
| 652 | """ |
| 653 | Function decorator for template file directives. |
| 654 | |
| 655 | Associate the regular expression RX with the function in `DIRECT'. |
| 656 | Directive functions are invoked as FUNC(PS, M), where PS is the ParseState, |
| 657 | and M is the match object resulting from matching RX against the directive |
| 658 | text. |
| 659 | """ |
| 660 | def _(func): |
| 661 | DIRECT.append((RX.compile(rx, RX.VERBOSE), func)) |
| 662 | return func |
| 663 | return _ |
| 664 | |
| 665 | def parse_template(ps): |
| 666 | """ |
| 667 | Parse a single template from the ParseState PS. |
| 668 | |
| 669 | A single template is either a chunk of text (parsed by `parse_text') or a |
| 670 | directive (handled by the appropriate function in `DIRECT'). |
| 671 | |
| 672 | Returns either a template object, or a special token. In particular, `EOF' |
| 673 | is returned if we run out of text; directives may return other tokens. |
| 674 | """ |
| 675 | |
| 676 | ## Skip initial comments. Otherwise we might end up with an empty |
| 677 | ## SequenceTemplate here. |
| 678 | while ps.curr is not None and ps.curr.startswith('%#'): ps.step() |
| 679 | |
| 680 | ## If we've run out of input, return `EOF' here. A line beginning `%%', or |
| 681 | ## not beginning `%', means we've found a chunk of text. Otherwise find |
| 682 | ## the right directive handler. |
| 683 | if ps.curr is None: return EOF |
| 684 | elif ps.curr.startswith('%'): |
| 685 | if ps.curr.startswith('%%'): return parse_text(ps) |
| 686 | for rx, func in DIRECT: |
| 687 | line = ps.curr[1:].strip() |
| 688 | m = rx.match(line) |
| 689 | if m: |
| 690 | ps.step() |
| 691 | return func(ps, m) |
| 692 | ps.error("unrecognized directive") |
| 693 | else: |
| 694 | return parse_text(ps) |
| 695 | |
| 696 | def parse_templseq(ps, nestp): |
| 697 | """ |
| 698 | Parse a sequence of templates from the ParseState PS. |
| 699 | |
| 700 | Calls `parse_template' repeatedly If NESTP is true, then an `END' token |
| 701 | (presumably from a directive handler) is permitted and halts parsing; |
| 702 | otherwise `END' signifies an error. |
| 703 | |
| 704 | Returns a template object. |
| 705 | """ |
| 706 | |
| 707 | tt = [] |
| 708 | while True: |
| 709 | t = parse_template(ps) |
| 710 | if t is END: |
| 711 | if nestp: break |
| 712 | else: ps.error("unexpected `end' directive") |
| 713 | elif t is EOF: |
| 714 | if nestp: ps.error("unexpected end of file") |
| 715 | else: break |
| 716 | tt.append(t) |
| 717 | return SequenceTemplate(tt) |
| 718 | |
| 719 | @direct(r'repeat') |
| 720 | def dir_repeat(ps, m): |
| 721 | """ |
| 722 | %repeat |
| 723 | BODY |
| 724 | %end |
| 725 | |
| 726 | Iterate the body over the cartesian product of the relations mentioned |
| 727 | within. |
| 728 | """ |
| 729 | return RepeatTemplate(parse_templseq(ps, True)) |
| 730 | |
| 731 | @direct(r'end') |
| 732 | def dir_end(ps, m): |
| 733 | """%end -- an end marker used to delimet chunks of template.""" |
| 734 | return END |
| 735 | |
| 736 | def compile_template(file, text): |
| 737 | """ |
| 738 | Compile TEXT into a template, attributing errors to FILE. |
| 739 | """ |
| 740 | ps = ParseState(file, text) |
| 741 | t = parse_templseq(ps, False) |
| 742 | return t |
| 743 | |
| 744 | ###-------------------------------------------------------------------------- |
| 745 | ### Main code. |
| 746 | |
| 747 | op = OP.OptionParser( |
| 748 | description = 'Generates files by filling in simple templates', |
| 749 | usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...', |
| 750 | version = 'Catacomb version @VERSION@') |
| 751 | def cb_gen(opt, optstr, arg, op): |
| 752 | op.values.input = arg |
| 753 | op.values.mode = 'gen' |
| 754 | for short, long, kw in [ |
| 755 | ('-l', '--list', dict( |
| 756 | action = 'store_const', const = 'list', dest = 'mode', |
| 757 | help = 'list filenames generated')), |
| 758 | ('-g', '--generate', dict( |
| 759 | action = 'callback', metavar = 'TEMPLATE', |
| 760 | callback = cb_gen, type = 'string', |
| 761 | help = 'generate file(s) from TEMPLATE file'))]: |
| 762 | op.add_option(short, long, **kw) |
| 763 | op.set_defaults(mode = 'what?') |
| 764 | opts, args = op.parse_args() |
| 765 | |
| 766 | if len(args) < 1: op.error('missing FILE') |
| 767 | filepat = args[0] |
| 768 | for rel in args[1:]: read_thing(rel) |
| 769 | filetempl = compile_template('<output>', filepat) |
| 770 | |
| 771 | def filenames(filetempl): |
| 772 | """ |
| 773 | Generate the filenames in the compiled filename template FILETEMPL. |
| 774 | """ |
| 775 | cs = CursorSet() |
| 776 | rr = filetempl.relations() |
| 777 | for r in rr: |
| 778 | if not len(r): return |
| 779 | cs.push(rr) |
| 780 | while True: |
| 781 | out = StringIO() |
| 782 | filetempl.subst(out, cs) |
| 783 | yield out.getvalue(), cs |
| 784 | if not cs.step(): break |
| 785 | cs.pop() |
| 786 | |
| 787 | ## Main dispatch. |
| 788 | if opts.mode == 'list': |
| 789 | for file, cs in filenames(filetempl): print file |
| 790 | elif opts.mode == 'gen': |
| 791 | with open(opts.input) as f: |
| 792 | templ = RepeatTemplate(compile_template(opts.input, f.read())) |
| 793 | for file, cs in filenames(filetempl): |
| 794 | new = file + '.new' |
| 795 | with open(new, 'w') as out: |
| 796 | templ.subst(out, cs) |
| 797 | OS.rename(new, file) |
| 798 | else: |
| 799 | die('What am I doing here?') |
| 800 | |
| 801 | ###----- That's all, folks -------------------------------------------------- |