peerdb/tripe-newpeers.in: Keep track of the canonical hostname too.
[tripe] / peerdb / tripe-newpeers.in
1 #! @PYTHON@
2 ### -*-python-*-
3 ###
4 ### Build a CDB file from configuration file
5 ###
6 ### (c) 2007 Straylight/Edgeware
7 ###
8
9 ###----- Licensing notice ---------------------------------------------------
10 ###
11 ### This file is part of Trivial IP Encryption (TrIPE).
12 ###
13 ### TrIPE is free software: you can redistribute it and/or modify it under
14 ### the terms of the GNU General Public License as published by the Free
15 ### Software Foundation; either version 3 of the License, or (at your
16 ### option) any later version.
17 ###
18 ### TrIPE is distributed in the hope that it will be useful, but WITHOUT
19 ### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20 ### FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 ### for more details.
22 ###
23 ### You should have received a copy of the GNU General Public License
24 ### along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
25
26 VERSION = '@VERSION@'
27
28 ###--------------------------------------------------------------------------
29 ### External dependencies.
30
31 import mLib as M
32 from optparse import OptionParser
33 import cdb as CDB
34 from sys import stdin, stdout, exit, argv
35 import re as RX
36 import os as OS
37 from cStringIO import StringIO
38
39 ###--------------------------------------------------------------------------
40 ### Utilities.
41
42 class CDBFake (object):
43 """Like cdbmake, but just outputs data suitable for cdb-map."""
44 def __init__(me, file = stdout):
45 me.file = file
46 def add(me, key, value):
47 me.file.write('%s:%s\n' % (key, value))
48 def finish(me):
49 pass
50
51 class ExpectedError (Exception): pass
52
53 ###--------------------------------------------------------------------------
54 ### A bulk DNS resolver.
55
56 class ResolverFailure (ExpectedError):
57 def __init__(me, host, msg):
58 me.host = host
59 me.msg = msg
60 def __str__(me):
61 return "failed to resolve `%s': %s" % (me.host, me.msg)
62
63 class ResolvingHost (object):
64 """
65 A host name which is being looked up by a bulk-resolver instance.
66
67 Most notably, this is where the flag-handling logic lives for the
68 $FLAGS[HOSTNAME] syntax.
69 """
70
71 def __init__(me, name):
72 """Make a new resolving-host object for the host NAME."""
73 me.name = name
74 me.addr = []
75 me.failure = None
76
77 def addaddr(me, addr):
78 """Add the address ADDR."""
79 me.addr.append(addr)
80
81 def failed(me, msg):
82 """
83 Report that resolution of this host failed, with a human-readable MSG.
84 """
85 me.failure = msg
86
87 def get(me, flags):
88 """Return a list of addresses according to the FLAGS string."""
89 if me.failure is not None: raise ResolverFailure(me.name, me.failure)
90 aa = me.addr
91 all = False
92 for ch in flags:
93 if ch == '*': all = True
94 else: raise ValueError("unknown address-resolution flag `%s'" % ch)
95 if not aa: raise ResolverFailure(me.name, 'no matching addresses found')
96 if not all: aa = [aa[0]]
97 return aa
98
99 class BulkResolver (object):
100 """
101 Resolve a number of DNS names in parallel.
102
103 The BulkResovler resolves a number of hostnames in parallel. Using it
104 works in three phases:
105
106 1. You call prepare(HOSTNAME) a number of times, to feed in the hostnames
107 you're interested in.
108
109 2. You call run() to actually drive the resolver.
110
111 3. You call lookup(HOSTNAME) to get the address you wanted. This will
112 fail with KeyError if the resolver couldn't resolve the HOSTNAME.
113 """
114
115 def __init__(me):
116 """Initialize the resolver."""
117 me._namemap = {}
118 me._noutstand = 0
119
120 def prepare(me, name):
121 """Prime the resolver to resolve the given host NAME."""
122 if name not in me._namemap:
123 me._namemap[name] = host = ResolvingHost(name)
124 host._resolv = M.SelResolveByName(
125 name,
126 lambda cname, alias, addr: me._resolved(host, cname, addr),
127 lambda: me._resolved(host, None, []))
128 me._noutstand += 1
129
130 def run(me):
131 """Run the background DNS resolver until it's finished."""
132 while me._noutstand: M.select()
133
134 def lookup(me, name, flags):
135 """Fetch the address corresponding to the host NAME."""
136 return me._namemap[name].get(flags)
137
138 def _resolved(me, host, cname, addr):
139 """Callback function: remember that ADDRs are the addresses for HOST."""
140 if not addr:
141 host.failed('(unknown failure)')
142 else:
143 if cname is not None: host.name = cname
144 for a in addr: host.addaddr(a)
145 host._resolv = None
146 me._noutstand -= 1
147
148 ###--------------------------------------------------------------------------
149 ### The configuration parser.
150
151 ## Match a comment or empty line.
152 RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])')
153
154 ## Match a section group header.
155 RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $')
156
157 ## Match an assignment line.
158 RX_ASSGN = RX.compile(r'''(?x) ^
159 ([^\s:=] (?: [^:=]* [^\s:=])?)
160 \s* [:=] \s*
161 (| \S | \S.*\S)
162 \s* $''')
163
164 ## Match a continuation line.
165 RX_CONT = RX.compile(r'''(?x) ^ \s+
166 (| \S | \S.*\S)
167 \s* $''')
168
169 ## Match a $(VAR) configuration variable reference; group 1 is the VAR.
170 RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)')
171
172 ## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags;
173 ## group 2 is the HOST.
174 RX_RESOLVE = RX.compile(r'(?x) \$ ([*]*) \[ ([^]]+) \]')
175
176 class ConfigSyntaxError (ExpectedError):
177 def __init__(me, fname, lno, msg):
178 me.fname = fname
179 me.lno = lno
180 me.msg = msg
181 def __str__(me):
182 return '%s:%d: %s' % (me.fname, me.lno, me.msg)
183
184 def _fmt_path(path):
185 return ' -> '.join(["`%s'" % hop for hop in path])
186
187 class AmbiguousOptionError (ExpectedError):
188 def __init__(me, key, patha, vala, pathb, valb):
189 me.key = key
190 me.patha, me.vala = patha, vala
191 me.pathb, me.valb = pathb, valb
192 def __str__(me):
193 return "Ambiguous answer resolving key `%s': " \
194 "path %s yields `%s' but %s yields `%s'" % \
195 (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb)
196
197 class InheritanceCycleError (ExpectedError):
198 def __init__(me, key, path):
199 me.key = key
200 me.path = path
201 def __str__(me):
202 return "Found a cycle %s looking up key `%s'" % \
203 (_fmt_path(me.path), me.key)
204
205 class MissingSectionException (ExpectedError):
206 def __init__(me, sec):
207 me.sec = sec
208 def __str__(me):
209 return "Section `%s' not found" % (me.sec)
210
211 class MissingKeyException (ExpectedError):
212 def __init__(me, sec, key):
213 me.sec = sec
214 me.key = key
215 def __str__(me):
216 return "Key `%s' not found in section `%s'" % (me.key, me.sec)
217
218 class ConfigSection (object):
219 """
220 A section in a configuration parser.
221
222 This is where a lot of the nitty-gritty stuff actually happens. The
223 `MyConfigParser' knows a lot about the internals of this class, which saves
224 on building a complicated interface.
225 """
226
227 def __init__(me, name, cp):
228 """Initialize a new, empty section with a given NAME and parent CP."""
229
230 ## The cache maps item keys to entries, which consist of a pair of
231 ## objects. There are four possible states for a cache entry:
232 ##
233 ## * missing -- there is no entry at all with this key, so we must
234 ## search for it;
235 ##
236 ## * None, None -- we are actively trying to resolve this key, so if we
237 ## encounter this state, we have found a cycle in the inheritance
238 ## graph;
239 ##
240 ## * None, [] -- we know that this key isn't reachable through any of
241 ## our parents;
242 ##
243 ## * VALUE, PATH -- we know that the key resolves to VALUE, along the
244 ## PATH from us (exclusive) to the defining parent (inclusive).
245 me.name = name
246 me._itemmap = dict()
247 me._cache = dict()
248 me._cp = cp
249
250 def _expand(me, string, resolvep):
251 """
252 Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING.
253
254 RESOLVEP is a boolean switch: do we bother to tax the resolver or not?
255 This is turned off by MyConfigParser's resolve() method while it's
256 collecting hostnames to be resolved.
257 """
258 string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string)
259 if resolvep:
260 string = RX_RESOLVE.sub(
261 lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))),
262 string)
263 return string
264
265 def _parents(me):
266 """Yield this section's parents."""
267 try: names = me._itemmap['@inherit']
268 except KeyError: return
269 for name in names.replace(',', ' ').split():
270 yield me._cp.section(name)
271
272 def _get(me, key, path = None):
273 """
274 Low-level option-fetching method.
275
276 Fetch the value for the named KEY in this section, or maybe (recursively)
277 a section which it inherits from.
278
279 Returns a pair VALUE, PATH. The value is not expanded; nor do we check
280 for the special `name' key. The caller is expected to do these things.
281 Returns None if no value could be found.
282 """
283
284 ## If we weren't given a path, then we'd better make one.
285 if path is None: path = []
286
287 ## Extend the path to cover us, but remember to remove us again when
288 ## we've finished. If we need to pass the current path back upwards,
289 ## then remember to take a copy.
290 path.append(me.name)
291 try:
292
293 ## If we've been this way before on another pass through then return the
294 ## value we found then. If we're still thinking about it then we've
295 ## found a cycle.
296 try: v, p = me._cache[key]
297 except KeyError: pass
298 else:
299 if p is None: raise InheritanceCycleError(key, path[:])
300 else: return v, path + p
301
302 ## See whether the answer is ready waiting for us.
303 try: v = me._itemmap[key]
304 except KeyError: pass
305 else:
306 p = path[:]
307 me._cache[key] = v, []
308 return v, p
309
310 ## Initially we have no idea.
311 value = None
312 winner = []
313
314 ## Go through our parents and ask them what they think.
315 me._cache[key] = None, None
316 for p in me._parents():
317
318 ## See whether we get an answer. If not, keep on going.
319 v, pp = p._get(key, path)
320 if v is None: continue
321
322 ## If we got an answer, check that it matches any previous ones.
323 if value is None:
324 value = v
325 winner = pp
326 elif value != v:
327 raise AmbiguousOptionError(key, winner, value, pp, v)
328
329 ## That's the best we could manage.
330 me._cache[key] = value, winner[len(path):]
331 return value, winner
332
333 finally:
334 ## Remove us from the path again.
335 path.pop()
336
337 def get(me, key, resolvep = True):
338 """
339 Retrieve the value of KEY from this section.
340 """
341
342 ## Special handling for the `name' key.
343 if key == 'name':
344 value = me._itemmap.get('name', me.name)
345 elif key == '@inherits':
346 try: return me._itemmap['@inherits']
347 except KeyError: raise MissingKeyException(me.name, key)
348 else:
349 value, _ = me._get(key)
350 if value is None:
351 raise MissingKeyException(me.name, key)
352
353 ## Expand the value and return it.
354 return me._expand(value, resolvep)
355
356 def items(me, resolvep = True):
357 """
358 Yield a list of item names in the section.
359 """
360
361 ## Initialize for a depth-first walk of the inheritance graph.
362 seen = { 'name': True }
363 visiting = { me.name: True }
364 stack = [me]
365
366 ## Visit nodes, collecting their keys. Don't believe the values:
367 ## resolving inheritance is too hard to do like this.
368 while stack:
369 sec = stack.pop()
370 for p in sec._parents():
371 if p.name not in visiting:
372 stack.append(p); visiting[p.name] = True
373
374 for key in sec._itemmap.iterkeys(): seen[key] = None
375
376 ## And we're done.
377 return seen.iterkeys()
378
379 class MyConfigParser (object):
380 """
381 A more advanced configuration parser.
382
383 This has four major enhancements over the standard ConfigParser which are
384 relevant to us.
385
386 * It recognizes `@inherits' keys and follows them when expanding a
387 value.
388
389 * It recognizes `$(VAR)' references to configuration variables during
390 expansion and processes them correctly.
391
392 * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them
393 correctly. FLAGS may be empty, or `*' (all addresses, space-separated,
394 rather than just the first).
395
396 * Its parsing behaviour is well-defined.
397
398 Use:
399
400 1. Call parse(FILENAME) to slurp in the configuration data.
401
402 2. Call resolve() to collect the hostnames which need to be resolved and
403 actually do the name resolution.
404
405 3. Call sections() to get a list of the configuration sections, or
406 section(NAME) to find a named section.
407
408 4. Call get(ITEM) on a section to collect the results, or items() to
409 iterate over them.
410 """
411
412 def __init__(me):
413 """
414 Initialize a new, empty configuration parser.
415 """
416 me._sectmap = dict()
417 me._resolver = BulkResolver()
418
419 def parse(me, f):
420 """
421 Parse configuration from a file F.
422 """
423
424 ## Initial parser state.
425 sect = None
426 key = None
427 val = None
428 lno = 0
429
430 ## An unpleasant hack. Python makes it hard to capture a value in a
431 ## variable and examine it in a single action, and this is the best that
432 ## I came up with.
433 m = [None]
434 def match(rx): m[0] = rx.match(line); return m[0]
435
436 ## Commit a key's value when we've determined that there are no further
437 ## continuation lines.
438 def flush():
439 if key is not None: sect._itemmap[key] = val.getvalue()
440
441 ## Work through all of the input lines.
442 for line in f:
443 lno += 1
444
445 if match(RX_COMMENT):
446 ## A comment or a blank line. Nothing doing. (This means that we
447 ## leave out blank lines which look like they might be continuation
448 ## lines.)
449
450 pass
451
452 elif match(RX_GRPHDR):
453 ## A section header. Flush out any previous value and set up the new
454 ## group.
455
456 flush()
457 name = m[0].group(1)
458 try: sect = me._sectmap[name]
459 except KeyError: sect = me._sectmap[name] = ConfigSection(name, me)
460 key = None
461
462 elif match(RX_ASSGN):
463 ## A new assignment. Flush out the old one, and set up to store this
464 ## one.
465
466 if sect is None:
467 raise ConfigSyntaxError(f.name, lno, 'no active section to update')
468 flush()
469 key = m[0].group(1)
470 val = StringIO(); val.write(m[0].group(2))
471
472 elif match(RX_CONT):
473 ## A continuation line. Accumulate the value.
474
475 if key is None:
476 raise ConfigSyntaxError(f.name, lno, 'no config value to continue')
477 val.write('\n'); val.write(m[0].group(1))
478
479 else:
480 ## Something else.
481
482 raise ConfigSyntaxError(f.name, lno, 'incomprehensible line')
483
484 ## Don't forget to commit any final value material.
485 flush()
486
487 def section(me, name):
488 """Return a ConfigSection with the given NAME."""
489 try: return me._sectmap[name]
490 except KeyError: raise MissingSectionException(name)
491
492 def sections(me):
493 """Yield the known sections."""
494 return me._sectmap.itervalues()
495
496 def resolve(me):
497 """
498 Works out all of the hostnames which need resolving and resolves them.
499
500 Until you call this, attempts to fetch configuration items which need to
501 resolve hostnames will fail!
502 """
503 for sec in me.sections():
504 for key in sec.items():
505 value = sec.get(key, resolvep = False)
506 for match in RX_RESOLVE.finditer(value):
507 me._resolver.prepare(match.group(2))
508 me._resolver.run()
509
510 ###--------------------------------------------------------------------------
511 ### Command-line handling.
512
513 def inputiter(things):
514 """
515 Iterate over command-line arguments, returning corresponding open files.
516
517 If none were given, or one is `-', assume standard input; if one is a
518 directory, scan it for files other than backups; otherwise return the
519 opened files.
520 """
521
522 if not things:
523 if OS.isatty(stdin.fileno()):
524 M.die('no input given, and stdin is a terminal')
525 yield stdin
526 else:
527 for thing in things:
528 if thing == '-':
529 yield stdin
530 elif OS.path.isdir(thing):
531 for item in OS.listdir(thing):
532 if item.endswith('~') or item.endswith('#'):
533 continue
534 name = OS.path.join(thing, item)
535 if not OS.path.isfile(name):
536 continue
537 yield file(name)
538 else:
539 yield file(thing)
540
541 def parse_options(argv = argv):
542 """
543 Parse command-line options, returning a pair (OPTS, ARGS).
544 """
545 M.ego(argv[0])
546 op = OptionParser(usage = '%prog [-c CDB] INPUT...',
547 version = '%%prog (tripe, version %s)' % VERSION)
548 op.add_option('-c', '--cdb', metavar = 'CDB',
549 dest = 'cdbfile', default = None,
550 help = 'Compile output into a CDB file.')
551 opts, args = op.parse_args(argv)
552 return opts, args
553
554 ###--------------------------------------------------------------------------
555 ### Main code.
556
557 def getconf(args):
558 """
559 Read the configuration files and return the accumulated result.
560
561 We make sure that all hostnames have been properly resolved.
562 """
563 conf = MyConfigParser()
564 for f in inputiter(args):
565 conf.parse(f)
566 conf.resolve()
567 return conf
568
569 def output(conf, cdb):
570 """
571 Output the configuration information CONF to the database CDB.
572
573 This is where the special `user' and `auto' database entries get set.
574 """
575 auto = []
576 for sec in sorted(conf.sections(), key = lambda sec: sec.name):
577 if sec.name.startswith('@'):
578 continue
579 elif sec.name.startswith('$'):
580 label = sec.name
581 else:
582 label = 'P%s' % sec.name
583 try: a = sec.get('auto')
584 except MissingKeyException: pass
585 else:
586 if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name)
587 try: u = sec.get('user')
588 except MissingKeyException: pass
589 else: cdb.add('U%s' % u)
590 url = M.URLEncode(semip = True)
591 for key in sorted(sec.items()):
592 if not key.startswith('@'):
593 url.encode(key, sec.get(key))
594 cdb.add(label, url.result)
595 cdb.add('%AUTO', ' '.join(auto))
596 cdb.finish()
597
598 def main():
599 """Main program."""
600 opts, args = parse_options()
601 if opts.cdbfile:
602 cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new')
603 else:
604 cdb = CDBFake()
605 try:
606 conf = getconf(args[1:])
607 output(conf, cdb)
608 except ExpectedError, e:
609 M.moan(str(e))
610 exit(2)
611
612 if __name__ == '__main__':
613 main()
614
615 ###----- That's all, folks --------------------------------------------------