| 1 | #! @PYTHON@ |
| 2 | ### |
| 3 | ### Generate files by filling in simple templates |
| 4 | ### |
| 5 | ### (c) 2013 Straylight/Edgeware |
| 6 | ### |
| 7 | |
| 8 | ###----- Licensing notice --------------------------------------------------- |
| 9 | ### |
| 10 | ### This file is part of Catacomb. |
| 11 | ### |
| 12 | ### Catacomb is free software; you can redistribute it and/or modify |
| 13 | ### it under the terms of the GNU Library General Public License as |
| 14 | ### published by the Free Software Foundation; either version 2 of the |
| 15 | ### License, or (at your option) any later version. |
| 16 | ### |
| 17 | ### Catacomb is distributed in the hope that it will be useful, |
| 18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ### GNU Library General Public License for more details. |
| 21 | ### |
| 22 | ### You should have received a copy of the GNU Library General Public |
| 23 | ### License along with Catacomb; if not, write to the Free |
| 24 | ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| 25 | ### MA 02111-1307, USA. |
| 26 | |
| 27 | from __future__ import with_statement |
| 28 | |
| 29 | import itertools as IT |
| 30 | import optparse as OP |
| 31 | import os as OS |
| 32 | import re as RX |
| 33 | import sys as SYS |
| 34 | if SYS.version_info >= (3,): from io import StringIO |
| 35 | else: from cStringIO import StringIO |
| 36 | from sys import argv, exit, stderr |
| 37 | |
| 38 | ###-------------------------------------------------------------------------- |
| 39 | ### Utilities. |
| 40 | |
| 41 | QUIS = OS.path.basename(argv[0]) # Program name, for use in errors. |
| 42 | |
| 43 | def die(msg): |
| 44 | """Report MSG as a fatal error, and exit.""" |
| 45 | stderr.write('%s: %s\n' % (QUIS, msg)) |
| 46 | exit(1) |
| 47 | |
| 48 | def indexed(seq): |
| 49 | """ |
| 50 | Generate pairs (I, X), where I counts from zero and X are the items of SEQ. |
| 51 | """ |
| 52 | return IT.izip(IT.count(), seq) |
| 53 | |
| 54 | if SYS.version_info >= (3,): |
| 55 | def func_name(func): return func.__name__ |
| 56 | IT.izip = zip |
| 57 | else: |
| 58 | def func_name(func): return func.func_name |
| 59 | |
| 60 | try: next |
| 61 | except NameError: |
| 62 | def next(obj): return obj.next() |
| 63 | |
| 64 | ###-------------------------------------------------------------------------- |
| 65 | ### Reading the input values. |
| 66 | |
| 67 | ## Map column names to (Relation, # index) pairs. |
| 68 | COLMAP = {} |
| 69 | |
| 70 | class Cursor (object): |
| 71 | """ |
| 72 | A Cursor object keeps track of an iteration through a Relation. |
| 73 | |
| 74 | At any time, the Cursor has a `current' row; the individual cells of this |
| 75 | row may be retrieved using Python's standard indexing operator. The `step' |
| 76 | method advances to the next row (if there is one). The `reset' method |
| 77 | returns to row zero. |
| 78 | """ |
| 79 | |
| 80 | def __init__(me, rel): |
| 81 | """ |
| 82 | Initialize a new Cursor object, tracking its way through a Relation REL. |
| 83 | |
| 84 | The new Cursor has row zero as its current row. The REL must not be |
| 85 | empty. |
| 86 | """ |
| 87 | me._rel = rel |
| 88 | me.reset() |
| 89 | |
| 90 | def step(me): |
| 91 | """ |
| 92 | Advance the Cursor to the next row. |
| 93 | |
| 94 | Returns False if there is no next row; otherwise True. |
| 95 | """ |
| 96 | me._i += 1 |
| 97 | if me._i >= len(me._rel): |
| 98 | me._i = me._row = None |
| 99 | return False |
| 100 | me._row = me._rel[me._i] |
| 101 | return True |
| 102 | |
| 103 | def reset(me): |
| 104 | """ |
| 105 | Reset the Cursor, so that row zero is current again. |
| 106 | """ |
| 107 | me._i = 0 |
| 108 | me._row = me._rel[0] |
| 109 | |
| 110 | def __getitem__(me, i): |
| 111 | """ |
| 112 | Return the item in column I of the Cursor's current row. |
| 113 | |
| 114 | The index must be acceptable to the underlying row object, but otherwise |
| 115 | the Cursor imposes no restrictions. Indices need not be numeric, for |
| 116 | example. |
| 117 | """ |
| 118 | return me._row[i] |
| 119 | |
| 120 | def __repr__(me): |
| 121 | """ |
| 122 | Return a text description of the Cursor, for diagnostic use. |
| 123 | """ |
| 124 | return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row) |
| 125 | |
| 126 | class CursorSet (object): |
| 127 | """ |
| 128 | A CursorSet iterates over the cartiesian product of a number of Relations. |
| 129 | |
| 130 | More precisely: it maintains a stack, each level of which tracks a number |
| 131 | of Relations. More Relations can be pushed onto this stack with the `push' |
| 132 | method, and removed with `pop'. The `step' method advances through the |
| 133 | cartesian product of the Relations in the top level of the stack -- the |
| 134 | `active' Relations. Columns from the current rows of all of the currently |
| 135 | known Relations -- whether active or not -- can be extracted using `get'. |
| 136 | """ |
| 137 | |
| 138 | def __init__(me): |
| 139 | """ |
| 140 | Initialize a new CursorSet object. |
| 141 | |
| 142 | A new CursorSet has an empty stack. |
| 143 | """ |
| 144 | me._map = {} |
| 145 | me._stack = [] |
| 146 | me._act = None |
| 147 | |
| 148 | def push(me, rels): |
| 149 | """ |
| 150 | Push the new Relations RELS onto the stack and start iterating. |
| 151 | |
| 152 | The currently active Relations are pushed down. Those Relations which are |
| 153 | not already known to the CursorSet become the newly active collection. |
| 154 | (Relations which are already known are simply ignored.) |
| 155 | |
| 156 | Iteration traverses Relations on the right more rapidly. |
| 157 | """ |
| 158 | cc = [] |
| 159 | rr = [] |
| 160 | for r in rels: |
| 161 | if r in me._map: continue |
| 162 | c = me._map[r] = Cursor(r) |
| 163 | rr.append(r) |
| 164 | cc.append(c) |
| 165 | me._stack.append((me._act, rr)) |
| 166 | me._act = cc |
| 167 | |
| 168 | def step(me): |
| 169 | """ |
| 170 | Advance the CursorSet through the currently active Relations. |
| 171 | |
| 172 | Return False if the active Relations have now been exhausted; otherwise |
| 173 | return True. |
| 174 | """ |
| 175 | i = 0 |
| 176 | while i < len(me._act): |
| 177 | if me._act[i].step(): return True |
| 178 | if i >= len(me._act): return False |
| 179 | me._act[i].reset() |
| 180 | i += 1 |
| 181 | return False |
| 182 | |
| 183 | def pop(me): |
| 184 | """ |
| 185 | Pop the active Relations. |
| 186 | |
| 187 | Return to iterating over the previously active collection. |
| 188 | """ |
| 189 | me._act, rels = me._stack.pop() |
| 190 | for r in rels: del me._map[r] |
| 191 | |
| 192 | def get(me, rel, i): |
| 193 | """ |
| 194 | Return the item with index I in the current row of Relation REL. |
| 195 | """ |
| 196 | return me._map[rel][i] |
| 197 | |
| 198 | class Relation (object): |
| 199 | """ |
| 200 | A Relation keeps track of a table of data. |
| 201 | |
| 202 | A Relation consists of a `header', which is a sequence of string names, |
| 203 | and a rectangular array of data, each row of which has the same number of |
| 204 | items as the header. |
| 205 | |
| 206 | Relations can be iterated over using Cursors and CursorSets. |
| 207 | """ |
| 208 | |
| 209 | def __init__(me, head): |
| 210 | """ |
| 211 | Initialize a new, empty Relation with header HEAD. |
| 212 | |
| 213 | The `COLMAP' dictionary is updated to map the names in the header to this |
| 214 | Relation and its column indices. |
| 215 | """ |
| 216 | me._head = head |
| 217 | me._rows = [] |
| 218 | for i, c in indexed(head): COLMAP[c] = me, i |
| 219 | |
| 220 | def addrow(me, row): |
| 221 | """ |
| 222 | Add a ROW to the Relation. |
| 223 | |
| 224 | The new row must have the correct number of entries. |
| 225 | """ |
| 226 | if len(row) != len(me._head): |
| 227 | die("mismatch: row `%s' doesn't match heading `%s'" % |
| 228 | (', '.join(row), ', '.join(me._head))) |
| 229 | me._rows.append(row) |
| 230 | |
| 231 | def __len__(me): |
| 232 | """Return the number of rows in the Relation.""" |
| 233 | return len(me._rows) |
| 234 | |
| 235 | def __getitem__(me, i): |
| 236 | """Return the Ith row of the Relation.""" |
| 237 | return me._rows[i] |
| 238 | |
| 239 | def __repr__(me): |
| 240 | """Return a textual description of the Relation, for diagnostic use.""" |
| 241 | return '#<Relation %r>' % me._head |
| 242 | |
| 243 | def read_immediate(word): |
| 244 | """ |
| 245 | Return a Relation constructed by parsing WORD. |
| 246 | |
| 247 | The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are |
| 248 | comma-separated lists of strings which will form the relation's header and |
| 249 | rows respectively. There is no way to include an item which contains a |
| 250 | comma or whitespace. |
| 251 | """ |
| 252 | head, rels = word.split('=', 1) |
| 253 | rel = Relation([c.strip() for c in head.split(',')]) |
| 254 | for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')]) |
| 255 | |
| 256 | def read_file(spec): |
| 257 | """ |
| 258 | Return a Relation constructed from a file, according to SPEC. |
| 259 | |
| 260 | The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a |
| 261 | comma-separated list of strings to form the relation's header. Each line |
| 262 | from the file which is neither empty nor begins with `#' is split into |
| 263 | whitespace-separated words to form a row in the relation. There is no way |
| 264 | to include an item which contains whitespace. |
| 265 | """ |
| 266 | file, head = spec.split(':', 1) |
| 267 | rel = Relation([c.strip() for c in head.split(',')]) |
| 268 | with open(file) as f: |
| 269 | for line in f: |
| 270 | line = line.strip() |
| 271 | if line.startswith('#') or line == '': continue |
| 272 | rel.addrow(line.split()) |
| 273 | |
| 274 | def read_thing(spec): |
| 275 | """ |
| 276 | Return a relation constructed from SPEC. |
| 277 | |
| 278 | If SPEC begins with `@' then read the relation from a file (see |
| 279 | `read_file'); otherwise interpret it as immediate data (see |
| 280 | `read_immediate'). |
| 281 | """ |
| 282 | if spec.startswith('@'): read_file(spec[1:]) |
| 283 | else: read_immediate(spec) |
| 284 | |
| 285 | ###-------------------------------------------------------------------------- |
| 286 | ### Template structure. |
| 287 | |
| 288 | class BasicTemplate (object): |
| 289 | """ |
| 290 | Base class for template objects. |
| 291 | |
| 292 | The protocol for templates consists of two methods: |
| 293 | |
| 294 | relations() Return a set of Relations mentioned at top-level in |
| 295 | substitutions in the template. |
| 296 | |
| 297 | subst(OUT, CS) Fill in the template, writing the output to the |
| 298 | stream OUT. The CS is a CursorSet object tracking |
| 299 | the current iteration state. |
| 300 | """ |
| 301 | pass |
| 302 | |
| 303 | class LiteralTemplate (BasicTemplate): |
| 304 | """ |
| 305 | A LiteralTemplate outputs a fixed string. |
| 306 | """ |
| 307 | |
| 308 | def __init__(me, text, **kw): |
| 309 | """ |
| 310 | Initialize a new LiteralTemplate object. TEXT is the text to be written. |
| 311 | """ |
| 312 | super(LiteralTemplate, me).__init__(**kw) |
| 313 | me._text = text |
| 314 | |
| 315 | def relations(me): |
| 316 | """A LiteralTemplate contains no substitutions.""" |
| 317 | return set() |
| 318 | |
| 319 | def subst(me, out, cs): |
| 320 | """A LiteralTemplate just emits its text.""" |
| 321 | out.write(me._text) |
| 322 | |
| 323 | def __repr__(me): |
| 324 | return '#<LiteralTemplate %r>' % me._text |
| 325 | |
| 326 | class TagTemplate (BasicTemplate): |
| 327 | """ |
| 328 | A TagTemplate object expands a substitution tag. |
| 329 | |
| 330 | It extracts an item from the current row of a relation, processes it |
| 331 | according to an operation, and outputs the result. |
| 332 | """ |
| 333 | |
| 334 | def __init__(me, rel, i, op, **kw): |
| 335 | """ |
| 336 | Initialize a new TagTemplate object. |
| 337 | |
| 338 | REL is the relation from which to pick the output; I is the column index; |
| 339 | OP is a transformation to apply to the data, and may be None to indicate |
| 340 | that the data should not be transformed. |
| 341 | """ |
| 342 | super(TagTemplate, me).__init__(**kw) |
| 343 | me._rel = rel |
| 344 | me._i = i |
| 345 | me._op = op |
| 346 | |
| 347 | def relations(me): |
| 348 | """The TagTemplate knows which relation it uses.""" |
| 349 | return set([me._rel]) |
| 350 | |
| 351 | def subst(me, out, cs): |
| 352 | """ |
| 353 | A TagTemplate extracts and transforms an item from the current row of |
| 354 | a relation. |
| 355 | """ |
| 356 | val = cs.get(me._rel, me._i) |
| 357 | if me._op is not None: val = me._op(val) |
| 358 | out.write(val) |
| 359 | |
| 360 | def __repr__(me): |
| 361 | return '#<TagTemplate %s>' % me._rel._head[me._i] |
| 362 | |
| 363 | class SequenceTemplate (BasicTemplate): |
| 364 | """ |
| 365 | A SequenceTemplate concatenates a number of other templates. |
| 366 | """ |
| 367 | |
| 368 | def __new__(cls, seq, **kw): |
| 369 | """ |
| 370 | Construct a template from a sequence SEQ of other templates. |
| 371 | |
| 372 | If SEQ is a singleton (which it often is) then return it directly; |
| 373 | otherwise construct a SequenceTemplate. |
| 374 | """ |
| 375 | if len(seq) == 1: |
| 376 | return seq[0] |
| 377 | else: |
| 378 | return super(SequenceTemplate, cls).__new__(cls, **kw) |
| 379 | |
| 380 | def __init__(me, seq, **kw): |
| 381 | """ |
| 382 | Initialize a new SequenceTemplate object from SEQ. |
| 383 | |
| 384 | The sequence is flattened out: if SEQ contains SequenceTemplates then we |
| 385 | use their children directly, so that we don't have a useless tree. |
| 386 | """ |
| 387 | super(SequenceTemplate, me).__init__(**kw) |
| 388 | tt = [] |
| 389 | cls = type(me) |
| 390 | for t in seq: |
| 391 | if isinstance(t, cls): tt += t._seq |
| 392 | else: tt.append(t) |
| 393 | me._seq = tt |
| 394 | |
| 395 | def relations(me): |
| 396 | """ |
| 397 | The relations of a SequenceTemplate are the union of the relations of its |
| 398 | children. |
| 399 | """ |
| 400 | rr = set() |
| 401 | for t in me._seq: rr.update(t.relations()) |
| 402 | return rr |
| 403 | |
| 404 | def subst(me, out, cs): |
| 405 | """ |
| 406 | The output of a SequenceTemplate is the concatenation of the expansions |
| 407 | of its children. |
| 408 | """ |
| 409 | for t in me._seq: t.subst(out, cs) |
| 410 | |
| 411 | def __repr__(me): |
| 412 | return '#<SequenceTemplate %r>' % me._seq |
| 413 | |
| 414 | class RepeatTemplate (BasicTemplate): |
| 415 | """ |
| 416 | A RepeatTemplate iterates its body over a number of relations. |
| 417 | """ |
| 418 | |
| 419 | def __init__(me, sub): |
| 420 | """ |
| 421 | Initialize a new RepeatTemplate, given a template to act as its body. |
| 422 | """ |
| 423 | me._sub = sub |
| 424 | |
| 425 | def relations(me): |
| 426 | """ |
| 427 | A RepeatTemplate hides the relations of its body. |
| 428 | """ |
| 429 | return set() |
| 430 | |
| 431 | def subst(me, out, cs): |
| 432 | """ |
| 433 | Substitute a RepeatTemplate, by iterating over the relations mentioned in |
| 434 | its body template. |
| 435 | """ |
| 436 | rr = me._sub.relations() |
| 437 | for r in rr: |
| 438 | if len(r) == 0: return |
| 439 | cs.push(rr) |
| 440 | while True: |
| 441 | me._sub.subst(out, cs) |
| 442 | if not cs.step(): break |
| 443 | cs.pop() |
| 444 | |
| 445 | def __repr__(me): |
| 446 | return '#<RepeatTemplate %r>' % me._sub |
| 447 | |
| 448 | ###-------------------------------------------------------------------------- |
| 449 | ### Some slightly cheesy parsing machinery. |
| 450 | |
| 451 | class ParseState (object): |
| 452 | """ |
| 453 | A ParseState object keeps track of a parser's position in a file. |
| 454 | |
| 455 | The `curr' slot contains the current line under consideration. |
| 456 | """ |
| 457 | |
| 458 | def __init__(me, file, text): |
| 459 | """ |
| 460 | Initialize a ParseState object. |
| 461 | |
| 462 | The FILE is a string naming the source file, and the TEXT is an iterator |
| 463 | over the file's lines. |
| 464 | """ |
| 465 | me._file = file |
| 466 | me._i = 0 |
| 467 | me._it = iter(text.splitlines(True)) |
| 468 | me.step() |
| 469 | |
| 470 | def step(me): |
| 471 | """ |
| 472 | Advance the ParseState to the next line. |
| 473 | |
| 474 | Sets `curr' to the next line, or to None if the input is exhausted. |
| 475 | """ |
| 476 | try: me.curr = next(me._it) |
| 477 | except StopIteration: me.curr = None |
| 478 | else: me._i += 1 |
| 479 | |
| 480 | def error(me, msg): |
| 481 | """ |
| 482 | Report a fatal error during parsing, attributing it to the current line. |
| 483 | """ |
| 484 | die('%s:%d: %s' % (me._file, me._i, msg)) |
| 485 | |
| 486 | class token (object): |
| 487 | """ |
| 488 | A token object has no interesting properties other than its identity. |
| 489 | """ |
| 490 | |
| 491 | def __init__(me, name): |
| 492 | """Initialize a new token, with the given NAME.""" |
| 493 | me._name = name |
| 494 | def __repr__(me): |
| 495 | """Return a description of the token, for diagnostic purposes.""" |
| 496 | return '#<%s>' % me._name |
| 497 | |
| 498 | ## Some magical tokens useful during parsing. |
| 499 | EOF = token('eof') |
| 500 | END = token('end') |
| 501 | |
| 502 | ## Regular expressions matching substitution tags. |
| 503 | R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE) |
| 504 | R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE) |
| 505 | |
| 506 | ## A dictionary mapping operation names to functions which implement them. |
| 507 | OPMAP = {} |
| 508 | |
| 509 | def defop(func): |
| 510 | """ |
| 511 | Decorator for substitution operator functions. |
| 512 | |
| 513 | Remember the operator in `OPMAP'; the operator's name is taken from FUNC's |
| 514 | name, removing a prefix `op_' if there is one. |
| 515 | |
| 516 | An operator function is given the raw value as an argument and should |
| 517 | return the transformed value. |
| 518 | """ |
| 519 | name = func_name(func) |
| 520 | if name.startswith('op_'): name = name[3:] |
| 521 | OPMAP[name] = func |
| 522 | return func |
| 523 | |
| 524 | @defop |
| 525 | def op_u(val): |
| 526 | """@{COLUMN:u} -- the item in upper case.""" |
| 527 | return val.upper() |
| 528 | |
| 529 | @defop |
| 530 | def op_l(val): |
| 531 | """@{COLUMN:l} -- the item in upper case.""" |
| 532 | return val.lower() |
| 533 | |
| 534 | @defop |
| 535 | def op_f(val): |
| 536 | """@{COLUMN:f} -- the item, with `/' characters replaced by `-'.""" |
| 537 | return val.replace('/', '-') |
| 538 | |
| 539 | R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+') |
| 540 | @defop |
| 541 | def op_c(val): |
| 542 | """ |
| 543 | @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'. |
| 544 | """ |
| 545 | return R_NOTIDENT.sub('_', val) |
| 546 | |
| 547 | def _pairify(val): |
| 548 | """ |
| 549 | Split VAL into two, at an `=' sign. |
| 550 | |
| 551 | If VAL has the form `THIS=THAT' then return the pair (THIS, THAT); |
| 552 | otherwise return (VAL, VAL). |
| 553 | """ |
| 554 | c = val.find('=') |
| 555 | if c >= 0: return val[:c], val[c + 1:] |
| 556 | else: return val, val |
| 557 | |
| 558 | @defop |
| 559 | def op_left(val): |
| 560 | """@{COLUMN:left} -- the left-hand side of the item.""" |
| 561 | return _pairify(val)[0] |
| 562 | @defop |
| 563 | def op_right(val): |
| 564 | """@{COLUMN:right} -- the left-hand side of the item.""" |
| 565 | return _pairify(val)[1] |
| 566 | |
| 567 | def parse_text(ps): |
| 568 | """ |
| 569 | Parse a chunk of text from a ParseState. |
| 570 | |
| 571 | Stop when we get to something which looks like a template keyword, but |
| 572 | extract tags. Return the resulting template. |
| 573 | |
| 574 | Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may |
| 575 | contain comments beginning `%#', which are ignored, and lines beginning |
| 576 | `%%' which have the initial `%' removed and are otherwise treated as normal |
| 577 | text (and, in particular, may contain tags). Other lines beginning with |
| 578 | `%' are directives and must be processed by our caller. |
| 579 | """ |
| 580 | |
| 581 | ## Starting out: no templates collected, and an empty buffer of literal |
| 582 | ## text. |
| 583 | tt = [] |
| 584 | lit = StringIO() |
| 585 | |
| 586 | def spill(): |
| 587 | ## Spill accumulated literal text from `lit' into a LiteralTemplate |
| 588 | ## object. |
| 589 | l = lit.getvalue() |
| 590 | if l: tt.append(LiteralTemplate(l)) |
| 591 | lit.seek(0) |
| 592 | lit.truncate() |
| 593 | |
| 594 | ## Iterate over the lines of input. |
| 595 | while True: |
| 596 | line = ps.curr |
| 597 | |
| 598 | ## Stop if there's no more text; handle lines beginning with `%'. |
| 599 | if line is None: break |
| 600 | elif line.startswith('%'): |
| 601 | if line.startswith('%#'): ps.step(); continue |
| 602 | elif line.startswith('%%'): line = line[1:] |
| 603 | else: break |
| 604 | |
| 605 | ## Work through the line, finding tags. |
| 606 | i = 0 |
| 607 | while True: |
| 608 | |
| 609 | ## If there are no more `@' signs, there can be no more tags, and we're |
| 610 | ## done. |
| 611 | j = line.find('@', i) |
| 612 | if j < 0: break |
| 613 | |
| 614 | ## Write the chunk we've found. |
| 615 | lit.write(line[i:j]) |
| 616 | |
| 617 | ## If the next character is also `@' then this is an escape and we |
| 618 | ## should carry on. |
| 619 | if line[j:].startswith('@@'): |
| 620 | lit.write('@') |
| 621 | i = j + 2 |
| 622 | continue |
| 623 | |
| 624 | ## Parse the tag into a column name, and maybe some operators. |
| 625 | m = R_SIMPLETAG.match(line, j) |
| 626 | if not m: m = R_COMPLEXTAG.match(line, j) |
| 627 | if not m: ps.error('invalid tag') |
| 628 | col = m.group(1) |
| 629 | try: rel, i = COLMAP[col] |
| 630 | except KeyError: ps.error("unknown column `%s'" % col) |
| 631 | ops = m.lastindex >= 2 and m.group(2) |
| 632 | |
| 633 | ## If we have operators then look them up and compose them. |
| 634 | wholeop = None |
| 635 | if ops: |
| 636 | for opname in ops[1:].split(':'): |
| 637 | try: op = OPMAP[opname] |
| 638 | except KeyError: ps.error("unknown operation `%s'" % opname) |
| 639 | if wholeop is None: wholeop = op |
| 640 | else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop) |
| 641 | |
| 642 | ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate |
| 643 | ## for the tag. |
| 644 | spill() |
| 645 | tt.append(TagTemplate(rel, i, wholeop)) |
| 646 | |
| 647 | ## Continue from after the tag. |
| 648 | i = m.end() |
| 649 | |
| 650 | ## Finished a line. Write out the remainder of the line and move onto |
| 651 | ## the next. |
| 652 | lit.write(line[i:]) |
| 653 | ps.step() |
| 654 | |
| 655 | ## Run out of things to do. Flush out the rest of the literal text and |
| 656 | ## combine the templates. |
| 657 | spill() |
| 658 | return SequenceTemplate(tt) |
| 659 | |
| 660 | ## A dictionary mapping regular expressions to directive-processing functions. |
| 661 | DIRECT = [] |
| 662 | |
| 663 | def direct(rx): |
| 664 | """ |
| 665 | Function decorator for template file directives. |
| 666 | |
| 667 | Associate the regular expression RX with the function in `DIRECT'. |
| 668 | Directive functions are invoked as FUNC(PS, M), where PS is the ParseState, |
| 669 | and M is the match object resulting from matching RX against the directive |
| 670 | text. |
| 671 | """ |
| 672 | def _(func): |
| 673 | DIRECT.append((RX.compile(rx, RX.VERBOSE), func)) |
| 674 | return func |
| 675 | return _ |
| 676 | |
| 677 | def parse_template(ps): |
| 678 | """ |
| 679 | Parse a single template from the ParseState PS. |
| 680 | |
| 681 | A single template is either a chunk of text (parsed by `parse_text') or a |
| 682 | directive (handled by the appropriate function in `DIRECT'). |
| 683 | |
| 684 | Returns either a template object, or a special token. In particular, `EOF' |
| 685 | is returned if we run out of text; directives may return other tokens. |
| 686 | """ |
| 687 | |
| 688 | ## Skip initial comments. Otherwise we might end up with an empty |
| 689 | ## SequenceTemplate here. |
| 690 | while ps.curr is not None and ps.curr.startswith('%#'): ps.step() |
| 691 | |
| 692 | ## If we've run out of input, return `EOF' here. A line beginning `%%', or |
| 693 | ## not beginning `%', means we've found a chunk of text. Otherwise find |
| 694 | ## the right directive handler. |
| 695 | if ps.curr is None: return EOF |
| 696 | elif ps.curr.startswith('%'): |
| 697 | if ps.curr.startswith('%%'): return parse_text(ps) |
| 698 | for rx, func in DIRECT: |
| 699 | line = ps.curr[1:].strip() |
| 700 | m = rx.match(line) |
| 701 | if m: |
| 702 | ps.step() |
| 703 | return func(ps, m) |
| 704 | ps.error("unrecognized directive") |
| 705 | else: |
| 706 | return parse_text(ps) |
| 707 | |
| 708 | def parse_templseq(ps, nestp): |
| 709 | """ |
| 710 | Parse a sequence of templates from the ParseState PS. |
| 711 | |
| 712 | Calls `parse_template' repeatedly If NESTP is true, then an `END' token |
| 713 | (presumably from a directive handler) is permitted and halts parsing; |
| 714 | otherwise `END' signifies an error. |
| 715 | |
| 716 | Returns a template object. |
| 717 | """ |
| 718 | |
| 719 | tt = [] |
| 720 | while True: |
| 721 | t = parse_template(ps) |
| 722 | if t is END: |
| 723 | if nestp: break |
| 724 | else: ps.error("unexpected `end' directive") |
| 725 | elif t is EOF: |
| 726 | if nestp: ps.error("unexpected end of file") |
| 727 | else: break |
| 728 | tt.append(t) |
| 729 | return SequenceTemplate(tt) |
| 730 | |
| 731 | @direct(r'repeat') |
| 732 | def dir_repeat(ps, m): |
| 733 | """ |
| 734 | %repeat |
| 735 | BODY |
| 736 | %end |
| 737 | |
| 738 | Iterate the body over the cartesian product of the relations mentioned |
| 739 | within. |
| 740 | """ |
| 741 | return RepeatTemplate(parse_templseq(ps, True)) |
| 742 | |
| 743 | @direct(r'end') |
| 744 | def dir_end(ps, m): |
| 745 | """%end -- an end marker used to delimet chunks of template.""" |
| 746 | return END |
| 747 | |
| 748 | def compile_template(file, text): |
| 749 | """ |
| 750 | Compile TEXT into a template, attributing errors to FILE. |
| 751 | """ |
| 752 | ps = ParseState(file, text) |
| 753 | t = parse_templseq(ps, False) |
| 754 | return t |
| 755 | |
| 756 | ###-------------------------------------------------------------------------- |
| 757 | ### Main code. |
| 758 | |
| 759 | op = OP.OptionParser( |
| 760 | description = 'Generates files by filling in simple templates', |
| 761 | usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...', |
| 762 | version = 'Catacomb version @VERSION@') |
| 763 | def cb_gen(opt, optstr, arg, op): |
| 764 | op.values.input = arg |
| 765 | op.values.mode = 'gen' |
| 766 | for short, long, kw in [ |
| 767 | ('-l', '--list', dict( |
| 768 | action = 'store_const', const = 'list', dest = 'mode', |
| 769 | help = 'list filenames generated')), |
| 770 | ('-g', '--generate', dict( |
| 771 | action = 'callback', metavar = 'TEMPLATE', |
| 772 | callback = cb_gen, type = 'string', |
| 773 | help = 'generate file(s) from TEMPLATE file'))]: |
| 774 | op.add_option(short, long, **kw) |
| 775 | op.set_defaults(mode = 'what?') |
| 776 | opts, args = op.parse_args() |
| 777 | |
| 778 | if len(args) < 1: op.error('missing FILE') |
| 779 | filepat = args[0] |
| 780 | for rel in args[1:]: read_thing(rel) |
| 781 | filetempl = compile_template('<output>', filepat) |
| 782 | |
| 783 | def filenames(filetempl): |
| 784 | """ |
| 785 | Generate the filenames in the compiled filename template FILETEMPL. |
| 786 | """ |
| 787 | cs = CursorSet() |
| 788 | rr = filetempl.relations() |
| 789 | for r in rr: |
| 790 | if not len(r): return |
| 791 | cs.push(rr) |
| 792 | while True: |
| 793 | out = StringIO() |
| 794 | filetempl.subst(out, cs) |
| 795 | yield out.getvalue(), cs |
| 796 | if not cs.step(): break |
| 797 | cs.pop() |
| 798 | |
| 799 | ## Main dispatch. |
| 800 | if opts.mode == 'list': |
| 801 | for file, cs in filenames(filetempl): print(file) |
| 802 | elif opts.mode == 'gen': |
| 803 | with open(opts.input) as f: |
| 804 | templ = RepeatTemplate(compile_template(opts.input, f.read())) |
| 805 | for file, cs in filenames(filetempl): |
| 806 | new = file + '.new' |
| 807 | with open(new, 'w') as out: |
| 808 | templ.subst(out, cs) |
| 809 | OS.rename(new, file) |
| 810 | else: |
| 811 | die('What am I doing here?') |
| 812 | |
| 813 | ###----- That's all, folks -------------------------------------------------- |