peerdb/tripe-newpeers.in: Split `prepare' in twain.
[tripe] / peerdb / tripe-newpeers.in
1 #! @PYTHON@
2 ### -*-python-*-
3 ###
4 ### Build a CDB file from configuration file
5 ###
6 ### (c) 2007 Straylight/Edgeware
7 ###
8
9 ###----- Licensing notice ---------------------------------------------------
10 ###
11 ### This file is part of Trivial IP Encryption (TrIPE).
12 ###
13 ### TrIPE is free software: you can redistribute it and/or modify it under
14 ### the terms of the GNU General Public License as published by the Free
15 ### Software Foundation; either version 3 of the License, or (at your
16 ### option) any later version.
17 ###
18 ### TrIPE is distributed in the hope that it will be useful, but WITHOUT
19 ### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20 ### FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 ### for more details.
22 ###
23 ### You should have received a copy of the GNU General Public License
24 ### along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
25
26 VERSION = '@VERSION@'
27
28 ###--------------------------------------------------------------------------
29 ### External dependencies.
30
31 import mLib as M
32 from optparse import OptionParser
33 import cdb as CDB
34 from sys import stdin, stdout, exit, argv
35 import re as RX
36 import os as OS
37 from cStringIO import StringIO
38
39 ###--------------------------------------------------------------------------
40 ### Utilities.
41
42 class CDBFake (object):
43 """Like cdbmake, but just outputs data suitable for cdb-map."""
44 def __init__(me, file = stdout):
45 me.file = file
46 def add(me, key, value):
47 me.file.write('%s:%s\n' % (key, value))
48 def finish(me):
49 pass
50
51 class ExpectedError (Exception): pass
52
53 ###--------------------------------------------------------------------------
54 ### A bulk DNS resolver.
55
56 class ResolverFailure (ExpectedError):
57 def __init__(me, host, msg):
58 me.host = host
59 me.msg = msg
60 def __str__(me):
61 return "failed to resolve `%s': %s" % (me.host, me.msg)
62
63 class ResolvingHost (object):
64 """
65 A host name which is being looked up by a bulk-resolver instance.
66
67 Most notably, this is where the flag-handling logic lives for the
68 $FLAGS[HOSTNAME] syntax.
69 """
70
71 def __init__(me, name):
72 """Make a new resolving-host object for the host NAME."""
73 me.name = name
74 me.addr = { 'INET': [] }
75 me.failure = None
76
77 def addaddr(me, af, addr):
78 """
79 Add the address ADDR with address family AF.
80
81 The address family must currently be `INET'.
82 """
83 me.addr[af].append(addr)
84
85 def failed(me, msg):
86 """
87 Report that resolution of this host failed, with a human-readable MSG.
88 """
89 me.failure = msg
90
91 def get(me, flags):
92 """Return a list of addresses according to the FLAGS string."""
93 if me.failure is not None: raise ResolverFailure(me.name, me.failure)
94 aa = []
95 a4 = me.addr['INET']
96 all, any = False, False
97 for ch in flags:
98 if ch == '*': all = True
99 elif ch == '4': aa += a4; any = True
100 else: raise ValueError("unknown address-resolution flag `%s'" % ch)
101 if not any: aa = a4
102 if not aa: raise ResolverFailure(me.name, 'no matching addresses found')
103 if not all: aa = [aa[0]]
104 return aa
105
106 class BulkResolver (object):
107 """
108 Resolve a number of DNS names in parallel.
109
110 The BulkResovler resolves a number of hostnames in parallel. Using it
111 works in three phases:
112
113 1. You call prepare(HOSTNAME) a number of times, to feed in the hostnames
114 you're interested in.
115
116 2. You call run() to actually drive the resolver.
117
118 3. You call lookup(HOSTNAME) to get the address you wanted. This will
119 fail with KeyError if the resolver couldn't resolve the HOSTNAME.
120 """
121
122 def __init__(me):
123 """Initialize the resolver."""
124 me._namemap = {}
125 me._noutstand = 0
126
127 def _prepare(me, host, name):
128 """Arrange to resolve a NAME, reporting the results to HOST."""
129 host._resolv = M.SelResolveByName(
130 name,
131 lambda cname, alias, addr: me._resolved(host, cname, addr),
132 lambda: me._resolved(host, None, []))
133 me._noutstand += 1
134
135 def prepare(me, name):
136 """Prime the resolver to resolve the given host NAME."""
137 if name not in me._namemap:
138 me._namemap[name] = host = ResolvingHost(name)
139 me._prepare(host, name)
140
141 def run(me):
142 """Run the background DNS resolver until it's finished."""
143 while me._noutstand: M.select()
144
145 def lookup(me, name, flags):
146 """Fetch the address corresponding to the host NAME."""
147 return me._namemap[name].get(flags)
148
149 def _resolved(me, host, cname, addr):
150 """Callback function: remember that ADDRs are the addresses for HOST."""
151 if not addr:
152 host.failed('(unknown failure)')
153 else:
154 if cname is not None: host.name = cname
155 for a in addr: host.addaddr('INET', a)
156 host._resolv = None
157 me._noutstand -= 1
158
159 ###--------------------------------------------------------------------------
160 ### The configuration parser.
161
162 ## Match a comment or empty line.
163 RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])')
164
165 ## Match a section group header.
166 RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $')
167
168 ## Match an assignment line.
169 RX_ASSGN = RX.compile(r'''(?x) ^
170 ([^\s:=] (?: [^:=]* [^\s:=])?)
171 \s* [:=] \s*
172 (| \S | \S.*\S)
173 \s* $''')
174
175 ## Match a continuation line.
176 RX_CONT = RX.compile(r'''(?x) ^ \s+
177 (| \S | \S.*\S)
178 \s* $''')
179
180 ## Match a $(VAR) configuration variable reference; group 1 is the VAR.
181 RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)')
182
183 ## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags;
184 ## group 2 is the HOST.
185 RX_RESOLVE = RX.compile(r'(?x) \$ ([4*]*) \[ ([^]]+) \]')
186
187 class ConfigSyntaxError (ExpectedError):
188 def __init__(me, fname, lno, msg):
189 me.fname = fname
190 me.lno = lno
191 me.msg = msg
192 def __str__(me):
193 return '%s:%d: %s' % (me.fname, me.lno, me.msg)
194
195 def _fmt_path(path):
196 return ' -> '.join(["`%s'" % hop for hop in path])
197
198 class AmbiguousOptionError (ExpectedError):
199 def __init__(me, key, patha, vala, pathb, valb):
200 me.key = key
201 me.patha, me.vala = patha, vala
202 me.pathb, me.valb = pathb, valb
203 def __str__(me):
204 return "Ambiguous answer resolving key `%s': " \
205 "path %s yields `%s' but %s yields `%s'" % \
206 (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb)
207
208 class InheritanceCycleError (ExpectedError):
209 def __init__(me, key, path):
210 me.key = key
211 me.path = path
212 def __str__(me):
213 return "Found a cycle %s looking up key `%s'" % \
214 (_fmt_path(me.path), me.key)
215
216 class MissingSectionException (ExpectedError):
217 def __init__(me, sec):
218 me.sec = sec
219 def __str__(me):
220 return "Section `%s' not found" % (me.sec)
221
222 class MissingKeyException (ExpectedError):
223 def __init__(me, sec, key):
224 me.sec = sec
225 me.key = key
226 def __str__(me):
227 return "Key `%s' not found in section `%s'" % (me.key, me.sec)
228
229 class ConfigSection (object):
230 """
231 A section in a configuration parser.
232
233 This is where a lot of the nitty-gritty stuff actually happens. The
234 `MyConfigParser' knows a lot about the internals of this class, which saves
235 on building a complicated interface.
236 """
237
238 def __init__(me, name, cp):
239 """Initialize a new, empty section with a given NAME and parent CP."""
240
241 ## The cache maps item keys to entries, which consist of a pair of
242 ## objects. There are four possible states for a cache entry:
243 ##
244 ## * missing -- there is no entry at all with this key, so we must
245 ## search for it;
246 ##
247 ## * None, None -- we are actively trying to resolve this key, so if we
248 ## encounter this state, we have found a cycle in the inheritance
249 ## graph;
250 ##
251 ## * None, [] -- we know that this key isn't reachable through any of
252 ## our parents;
253 ##
254 ## * VALUE, PATH -- we know that the key resolves to VALUE, along the
255 ## PATH from us (exclusive) to the defining parent (inclusive).
256 me.name = name
257 me._itemmap = dict()
258 me._cache = dict()
259 me._cp = cp
260
261 def _expand(me, string, resolvep):
262 """
263 Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING.
264
265 RESOLVEP is a boolean switch: do we bother to tax the resolver or not?
266 This is turned off by MyConfigParser's resolve() method while it's
267 collecting hostnames to be resolved.
268 """
269 string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string)
270 if resolvep:
271 string = RX_RESOLVE.sub(
272 lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))),
273 string)
274 return string
275
276 def _parents(me):
277 """Yield this section's parents."""
278 try: names = me._itemmap['@inherit']
279 except KeyError: return
280 for name in names.replace(',', ' ').split():
281 yield me._cp.section(name)
282
283 def _get(me, key, path = None):
284 """
285 Low-level option-fetching method.
286
287 Fetch the value for the named KEY in this section, or maybe (recursively)
288 a section which it inherits from.
289
290 Returns a pair VALUE, PATH. The value is not expanded; nor do we check
291 for the special `name' key. The caller is expected to do these things.
292 Returns None if no value could be found.
293 """
294
295 ## If we weren't given a path, then we'd better make one.
296 if path is None: path = []
297
298 ## Extend the path to cover us, but remember to remove us again when
299 ## we've finished. If we need to pass the current path back upwards,
300 ## then remember to take a copy.
301 path.append(me.name)
302 try:
303
304 ## If we've been this way before on another pass through then return the
305 ## value we found then. If we're still thinking about it then we've
306 ## found a cycle.
307 try: v, p = me._cache[key]
308 except KeyError: pass
309 else:
310 if p is None: raise InheritanceCycleError(key, path[:])
311 else: return v, path + p
312
313 ## See whether the answer is ready waiting for us.
314 try: v = me._itemmap[key]
315 except KeyError: pass
316 else:
317 p = path[:]
318 me._cache[key] = v, []
319 return v, p
320
321 ## Initially we have no idea.
322 value = None
323 winner = []
324
325 ## Go through our parents and ask them what they think.
326 me._cache[key] = None, None
327 for p in me._parents():
328
329 ## See whether we get an answer. If not, keep on going.
330 v, pp = p._get(key, path)
331 if v is None: continue
332
333 ## If we got an answer, check that it matches any previous ones.
334 if value is None:
335 value = v
336 winner = pp
337 elif value != v:
338 raise AmbiguousOptionError(key, winner, value, pp, v)
339
340 ## That's the best we could manage.
341 me._cache[key] = value, winner[len(path):]
342 return value, winner
343
344 finally:
345 ## Remove us from the path again.
346 path.pop()
347
348 def get(me, key, resolvep = True):
349 """
350 Retrieve the value of KEY from this section.
351 """
352
353 ## Special handling for the `name' key.
354 if key == 'name':
355 value = me._itemmap.get('name', me.name)
356 elif key == '@inherits':
357 try: return me._itemmap['@inherits']
358 except KeyError: raise MissingKeyException(me.name, key)
359 else:
360 value, _ = me._get(key)
361 if value is None:
362 raise MissingKeyException(me.name, key)
363
364 ## Expand the value and return it.
365 return me._expand(value, resolvep)
366
367 def items(me, resolvep = True):
368 """
369 Yield a list of item names in the section.
370 """
371
372 ## Initialize for a depth-first walk of the inheritance graph.
373 seen = { 'name': True }
374 visiting = { me.name: True }
375 stack = [me]
376
377 ## Visit nodes, collecting their keys. Don't believe the values:
378 ## resolving inheritance is too hard to do like this.
379 while stack:
380 sec = stack.pop()
381 for p in sec._parents():
382 if p.name not in visiting:
383 stack.append(p); visiting[p.name] = True
384
385 for key in sec._itemmap.iterkeys(): seen[key] = None
386
387 ## And we're done.
388 return seen.iterkeys()
389
390 class MyConfigParser (object):
391 """
392 A more advanced configuration parser.
393
394 This has four major enhancements over the standard ConfigParser which are
395 relevant to us.
396
397 * It recognizes `@inherits' keys and follows them when expanding a
398 value.
399
400 * It recognizes `$(VAR)' references to configuration variables during
401 expansion and processes them correctly.
402
403 * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them
404 correctly. FLAGS consists of characters `4' (IPv4 addresses), and `*'
405 (all addresses, space-separated, rather than just the first).
406
407 * Its parsing behaviour is well-defined.
408
409 Use:
410
411 1. Call parse(FILENAME) to slurp in the configuration data.
412
413 2. Call resolve() to collect the hostnames which need to be resolved and
414 actually do the name resolution.
415
416 3. Call sections() to get a list of the configuration sections, or
417 section(NAME) to find a named section.
418
419 4. Call get(ITEM) on a section to collect the results, or items() to
420 iterate over them.
421 """
422
423 def __init__(me):
424 """
425 Initialize a new, empty configuration parser.
426 """
427 me._sectmap = dict()
428 me._resolver = BulkResolver()
429
430 def parse(me, f):
431 """
432 Parse configuration from a file F.
433 """
434
435 ## Initial parser state.
436 sect = None
437 key = None
438 val = None
439 lno = 0
440
441 ## An unpleasant hack. Python makes it hard to capture a value in a
442 ## variable and examine it in a single action, and this is the best that
443 ## I came up with.
444 m = [None]
445 def match(rx): m[0] = rx.match(line); return m[0]
446
447 ## Commit a key's value when we've determined that there are no further
448 ## continuation lines.
449 def flush():
450 if key is not None: sect._itemmap[key] = val.getvalue()
451
452 ## Work through all of the input lines.
453 for line in f:
454 lno += 1
455
456 if match(RX_COMMENT):
457 ## A comment or a blank line. Nothing doing. (This means that we
458 ## leave out blank lines which look like they might be continuation
459 ## lines.)
460
461 pass
462
463 elif match(RX_GRPHDR):
464 ## A section header. Flush out any previous value and set up the new
465 ## group.
466
467 flush()
468 name = m[0].group(1)
469 try: sect = me._sectmap[name]
470 except KeyError: sect = me._sectmap[name] = ConfigSection(name, me)
471 key = None
472
473 elif match(RX_ASSGN):
474 ## A new assignment. Flush out the old one, and set up to store this
475 ## one.
476
477 if sect is None:
478 raise ConfigSyntaxError(f.name, lno, 'no active section to update')
479 flush()
480 key = m[0].group(1)
481 val = StringIO(); val.write(m[0].group(2))
482
483 elif match(RX_CONT):
484 ## A continuation line. Accumulate the value.
485
486 if key is None:
487 raise ConfigSyntaxError(f.name, lno, 'no config value to continue')
488 val.write('\n'); val.write(m[0].group(1))
489
490 else:
491 ## Something else.
492
493 raise ConfigSyntaxError(f.name, lno, 'incomprehensible line')
494
495 ## Don't forget to commit any final value material.
496 flush()
497
498 def section(me, name):
499 """Return a ConfigSection with the given NAME."""
500 try: return me._sectmap[name]
501 except KeyError: raise MissingSectionException(name)
502
503 def sections(me):
504 """Yield the known sections."""
505 return me._sectmap.itervalues()
506
507 def resolve(me):
508 """
509 Works out all of the hostnames which need resolving and resolves them.
510
511 Until you call this, attempts to fetch configuration items which need to
512 resolve hostnames will fail!
513 """
514 for sec in me.sections():
515 for key in sec.items():
516 value = sec.get(key, resolvep = False)
517 for match in RX_RESOLVE.finditer(value):
518 me._resolver.prepare(match.group(2))
519 me._resolver.run()
520
521 ###--------------------------------------------------------------------------
522 ### Command-line handling.
523
524 def inputiter(things):
525 """
526 Iterate over command-line arguments, returning corresponding open files.
527
528 If none were given, or one is `-', assume standard input; if one is a
529 directory, scan it for files other than backups; otherwise return the
530 opened files.
531 """
532
533 if not things:
534 if OS.isatty(stdin.fileno()):
535 M.die('no input given, and stdin is a terminal')
536 yield stdin
537 else:
538 for thing in things:
539 if thing == '-':
540 yield stdin
541 elif OS.path.isdir(thing):
542 for item in OS.listdir(thing):
543 if item.endswith('~') or item.endswith('#'):
544 continue
545 name = OS.path.join(thing, item)
546 if not OS.path.isfile(name):
547 continue
548 yield file(name)
549 else:
550 yield file(thing)
551
552 def parse_options(argv = argv):
553 """
554 Parse command-line options, returning a pair (OPTS, ARGS).
555 """
556 M.ego(argv[0])
557 op = OptionParser(usage = '%prog [-c CDB] INPUT...',
558 version = '%%prog (tripe, version %s)' % VERSION)
559 op.add_option('-c', '--cdb', metavar = 'CDB',
560 dest = 'cdbfile', default = None,
561 help = 'Compile output into a CDB file.')
562 opts, args = op.parse_args(argv)
563 return opts, args
564
565 ###--------------------------------------------------------------------------
566 ### Main code.
567
568 def getconf(args):
569 """
570 Read the configuration files and return the accumulated result.
571
572 We make sure that all hostnames have been properly resolved.
573 """
574 conf = MyConfigParser()
575 for f in inputiter(args):
576 conf.parse(f)
577 conf.resolve()
578 return conf
579
580 def output(conf, cdb):
581 """
582 Output the configuration information CONF to the database CDB.
583
584 This is where the special `user' and `auto' database entries get set.
585 """
586 auto = []
587 for sec in sorted(conf.sections(), key = lambda sec: sec.name):
588 if sec.name.startswith('@'):
589 continue
590 elif sec.name.startswith('$'):
591 label = sec.name
592 else:
593 label = 'P%s' % sec.name
594 try: a = sec.get('auto')
595 except MissingKeyException: pass
596 else:
597 if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name)
598 try: u = sec.get('user')
599 except MissingKeyException: pass
600 else: cdb.add('U%s' % u)
601 url = M.URLEncode(semip = True)
602 for key in sorted(sec.items()):
603 if not key.startswith('@'):
604 url.encode(key, sec.get(key))
605 cdb.add(label, url.result)
606 cdb.add('%AUTO', ' '.join(auto))
607 cdb.finish()
608
609 def main():
610 """Main program."""
611 opts, args = parse_options()
612 if opts.cdbfile:
613 cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new')
614 else:
615 cdb = CDBFake()
616 try:
617 conf = getconf(args[1:])
618 output(conf, cdb)
619 except ExpectedError, e:
620 M.moan(str(e))
621 exit(2)
622
623 if __name__ == '__main__':
624 main()
625
626 ###----- That's all, folks --------------------------------------------------