peerdb/tripe-newpeers.in: Keep track of the canonical hostname too.
[tripe] / peerdb / tripe-newpeers.in
CommitLineData
6005ef9b
MW
1#! @PYTHON@
2### -*-python-*-
3###
4### Build a CDB file from configuration file
5###
6### (c) 2007 Straylight/Edgeware
7###
8
9###----- Licensing notice ---------------------------------------------------
10###
11### This file is part of Trivial IP Encryption (TrIPE).
12###
11ad66c2
MW
13### TrIPE is free software: you can redistribute it and/or modify it under
14### the terms of the GNU General Public License as published by the Free
15### Software Foundation; either version 3 of the License, or (at your
16### option) any later version.
6005ef9b 17###
11ad66c2
MW
18### TrIPE is distributed in the hope that it will be useful, but WITHOUT
19### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20### FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21### for more details.
6005ef9b
MW
22###
23### You should have received a copy of the GNU General Public License
11ad66c2 24### along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
6005ef9b
MW
25
26VERSION = '@VERSION@'
27
28###--------------------------------------------------------------------------
29### External dependencies.
30
6005ef9b
MW
31import mLib as M
32from optparse import OptionParser
33import cdb as CDB
34from sys import stdin, stdout, exit, argv
35import re as RX
36import os as OS
b7e5aa06 37from cStringIO import StringIO
6005ef9b
MW
38
39###--------------------------------------------------------------------------
40### Utilities.
41
42class CDBFake (object):
43 """Like cdbmake, but just outputs data suitable for cdb-map."""
44 def __init__(me, file = stdout):
45 me.file = file
46 def add(me, key, value):
47 me.file.write('%s:%s\n' % (key, value))
48 def finish(me):
49 pass
50
1c4623dd
MW
51class ExpectedError (Exception): pass
52
6005ef9b
MW
53###--------------------------------------------------------------------------
54### A bulk DNS resolver.
55
1c4623dd 56class ResolverFailure (ExpectedError):
6f48da4a
MW
57 def __init__(me, host, msg):
58 me.host = host
59 me.msg = msg
60 def __str__(me):
61 return "failed to resolve `%s': %s" % (me.host, me.msg)
62
660564a1
MW
63class ResolvingHost (object):
64 """
65 A host name which is being looked up by a bulk-resolver instance.
ef7d7afb
MW
66
67 Most notably, this is where the flag-handling logic lives for the
68 $FLAGS[HOSTNAME] syntax.
660564a1
MW
69 """
70
71 def __init__(me, name):
72 """Make a new resolving-host object for the host NAME."""
73 me.name = name
ef7d7afb 74 me.addr = []
660564a1
MW
75 me.failure = None
76
ef7d7afb 77 def addaddr(me, addr):
660564a1 78 """Add the address ADDR."""
ef7d7afb 79 me.addr.append(addr)
660564a1
MW
80
81 def failed(me, msg):
82 """
83 Report that resolution of this host failed, with a human-readable MSG.
84 """
85 me.failure = msg
86
ef7d7afb
MW
87 def get(me, flags):
88 """Return a list of addresses according to the FLAGS string."""
660564a1 89 if me.failure is not None: raise ResolverFailure(me.name, me.failure)
ef7d7afb
MW
90 aa = me.addr
91 all = False
92 for ch in flags:
93 if ch == '*': all = True
94 else: raise ValueError("unknown address-resolution flag `%s'" % ch)
95 if not aa: raise ResolverFailure(me.name, 'no matching addresses found')
96 if not all: aa = [aa[0]]
97 return aa
660564a1 98
6005ef9b
MW
99class BulkResolver (object):
100 """
101 Resolve a number of DNS names in parallel.
102
103 The BulkResovler resolves a number of hostnames in parallel. Using it
104 works in three phases:
105
106 1. You call prepare(HOSTNAME) a number of times, to feed in the hostnames
107 you're interested in.
108
109 2. You call run() to actually drive the resolver.
110
111 3. You call lookup(HOSTNAME) to get the address you wanted. This will
112 fail with KeyError if the resolver couldn't resolve the HOSTNAME.
113 """
114
115 def __init__(me):
116 """Initialize the resolver."""
6005ef9b 117 me._namemap = {}
660564a1
MW
118 me._noutstand = 0
119
120 def prepare(me, name):
121 """Prime the resolver to resolve the given host NAME."""
122 if name not in me._namemap:
123 me._namemap[name] = host = ResolvingHost(name)
124 host._resolv = M.SelResolveByName(
125 name,
3c8803fa
MW
126 lambda cname, alias, addr: me._resolved(host, cname, addr),
127 lambda: me._resolved(host, None, []))
660564a1 128 me._noutstand += 1
6005ef9b
MW
129
130 def run(me):
131 """Run the background DNS resolver until it's finished."""
660564a1 132 while me._noutstand: M.select()
6005ef9b 133
ef7d7afb 134 def lookup(me, name, flags):
660564a1 135 """Fetch the address corresponding to the host NAME."""
ef7d7afb 136 return me._namemap[name].get(flags)
6005ef9b 137
3c8803fa 138 def _resolved(me, host, cname, addr):
ef7d7afb
MW
139 """Callback function: remember that ADDRs are the addresses for HOST."""
140 if not addr:
660564a1
MW
141 host.failed('(unknown failure)')
142 else:
3c8803fa 143 if cname is not None: host.name = cname
ef7d7afb 144 for a in addr: host.addaddr(a)
660564a1
MW
145 host._resolv = None
146 me._noutstand -= 1
6005ef9b
MW
147
148###--------------------------------------------------------------------------
149### The configuration parser.
150
b7e5aa06
MW
151## Match a comment or empty line.
152RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])')
153
154## Match a section group header.
155RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $')
156
157## Match an assignment line.
158RX_ASSGN = RX.compile(r'''(?x) ^
159 ([^\s:=] (?: [^:=]* [^\s:=])?)
160 \s* [:=] \s*
161 (| \S | \S.*\S)
162 \s* $''')
163
164## Match a continuation line.
165RX_CONT = RX.compile(r'''(?x) ^ \s+
166 (| \S | \S.*\S)
167 \s* $''')
168
6005ef9b 169## Match a $(VAR) configuration variable reference; group 1 is the VAR.
2d51bc9f 170RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)')
6005ef9b 171
ef7d7afb
MW
172## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags;
173## group 2 is the HOST.
174RX_RESOLVE = RX.compile(r'(?x) \$ ([*]*) \[ ([^]]+) \]')
6005ef9b 175
1c4623dd 176class ConfigSyntaxError (ExpectedError):
b7e5aa06
MW
177 def __init__(me, fname, lno, msg):
178 me.fname = fname
179 me.lno = lno
180 me.msg = msg
181 def __str__(me):
182 return '%s:%d: %s' % (me.fname, me.lno, me.msg)
183
bd3db76c
MW
184def _fmt_path(path):
185 return ' -> '.join(["`%s'" % hop for hop in path])
186
1c4623dd 187class AmbiguousOptionError (ExpectedError):
bd3db76c
MW
188 def __init__(me, key, patha, vala, pathb, valb):
189 me.key = key
190 me.patha, me.vala = patha, vala
191 me.pathb, me.valb = pathb, valb
192 def __str__(me):
193 return "Ambiguous answer resolving key `%s': " \
194 "path %s yields `%s' but %s yields `%s'" % \
195 (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb)
196
1c4623dd 197class InheritanceCycleError (ExpectedError):
bd3db76c
MW
198 def __init__(me, key, path):
199 me.key = key
200 me.path = path
201 def __str__(me):
202 return "Found a cycle %s looking up key `%s'" % \
203 (_fmt_path(me.path), me.key)
204
1c4623dd 205class MissingSectionException (ExpectedError):
e3ec3a3a 206 def __init__(me, sec):
260dce8e 207 me.sec = sec
e3ec3a3a
MW
208 def __str__(me):
209 return "Section `%s' not found" % (me.sec)
210
1c4623dd 211class MissingKeyException (ExpectedError):
bd3db76c
MW
212 def __init__(me, sec, key):
213 me.sec = sec
214 me.key = key
215 def __str__(me):
216 return "Key `%s' not found in section `%s'" % (me.key, me.sec)
217
e3ec3a3a
MW
218class ConfigSection (object):
219 """
220 A section in a configuration parser.
221
222 This is where a lot of the nitty-gritty stuff actually happens. The
223 `MyConfigParser' knows a lot about the internals of this class, which saves
224 on building a complicated interface.
225 """
226
227 def __init__(me, name, cp):
228 """Initialize a new, empty section with a given NAME and parent CP."""
886350e8
MW
229
230 ## The cache maps item keys to entries, which consist of a pair of
231 ## objects. There are four possible states for a cache entry:
232 ##
233 ## * missing -- there is no entry at all with this key, so we must
234 ## search for it;
235 ##
236 ## * None, None -- we are actively trying to resolve this key, so if we
237 ## encounter this state, we have found a cycle in the inheritance
238 ## graph;
239 ##
240 ## * None, [] -- we know that this key isn't reachable through any of
241 ## our parents;
242 ##
243 ## * VALUE, PATH -- we know that the key resolves to VALUE, along the
244 ## PATH from us (exclusive) to the defining parent (inclusive).
e3ec3a3a
MW
245 me.name = name
246 me._itemmap = dict()
886350e8 247 me._cache = dict()
e3ec3a3a
MW
248 me._cp = cp
249
250 def _expand(me, string, resolvep):
251 """
ef7d7afb 252 Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING.
e3ec3a3a
MW
253
254 RESOLVEP is a boolean switch: do we bother to tax the resolver or not?
255 This is turned off by MyConfigParser's resolve() method while it's
256 collecting hostnames to be resolved.
257 """
ef7d7afb 258 string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string)
e3ec3a3a 259 if resolvep:
ef7d7afb
MW
260 string = RX_RESOLVE.sub(
261 lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))),
262 string)
e3ec3a3a
MW
263 return string
264
4251f8ad
MW
265 def _parents(me):
266 """Yield this section's parents."""
267 try: names = me._itemmap['@inherit']
268 except KeyError: return
269 for name in names.replace(',', ' ').split():
270 yield me._cp.section(name)
271
886350e8 272 def _get(me, key, path = None):
e3ec3a3a
MW
273 """
274 Low-level option-fetching method.
275
276 Fetch the value for the named KEY in this section, or maybe (recursively)
277 a section which it inherits from.
278
279 Returns a pair VALUE, PATH. The value is not expanded; nor do we check
280 for the special `name' key. The caller is expected to do these things.
281 Returns None if no value could be found.
282 """
283
886350e8 284 ## If we weren't given a path, then we'd better make one.
e3ec3a3a
MW
285 if path is None: path = []
286
287 ## Extend the path to cover us, but remember to remove us again when
288 ## we've finished. If we need to pass the current path back upwards,
289 ## then remember to take a copy.
290 path.append(me.name)
291 try:
292
886350e8
MW
293 ## If we've been this way before on another pass through then return the
294 ## value we found then. If we're still thinking about it then we've
295 ## found a cycle.
296 try: v, p = me._cache[key]
e3ec3a3a
MW
297 except KeyError: pass
298 else:
886350e8
MW
299 if p is None: raise InheritanceCycleError(key, path[:])
300 else: return v, path + p
e3ec3a3a
MW
301
302 ## See whether the answer is ready waiting for us.
303 try: v = me._itemmap[key]
304 except KeyError: pass
886350e8
MW
305 else:
306 p = path[:]
307 me._cache[key] = v, []
308 return v, p
e3ec3a3a 309
e3ec3a3a
MW
310 ## Initially we have no idea.
311 value = None
886350e8 312 winner = []
e3ec3a3a
MW
313
314 ## Go through our parents and ask them what they think.
886350e8 315 me._cache[key] = None, None
4251f8ad 316 for p in me._parents():
e3ec3a3a
MW
317
318 ## See whether we get an answer. If not, keep on going.
886350e8 319 v, pp = p._get(key, path)
e3ec3a3a
MW
320 if v is None: continue
321
322 ## If we got an answer, check that it matches any previous ones.
323 if value is None:
324 value = v
325 winner = pp
326 elif value != v:
327 raise AmbiguousOptionError(key, winner, value, pp, v)
328
329 ## That's the best we could manage.
886350e8 330 me._cache[key] = value, winner[len(path):]
e3ec3a3a
MW
331 return value, winner
332
333 finally:
334 ## Remove us from the path again.
335 path.pop()
336
337 def get(me, key, resolvep = True):
338 """
339 Retrieve the value of KEY from this section.
340 """
341
342 ## Special handling for the `name' key.
343 if key == 'name':
344 value = me._itemmap.get('name', me.name)
7dd9d51f
MW
345 elif key == '@inherits':
346 try: return me._itemmap['@inherits']
347 except KeyError: raise MissingKeyException(me.name, key)
e3ec3a3a
MW
348 else:
349 value, _ = me._get(key)
350 if value is None:
351 raise MissingKeyException(me.name, key)
352
353 ## Expand the value and return it.
354 return me._expand(value, resolvep)
355
356 def items(me, resolvep = True):
357 """
85341d9c 358 Yield a list of item names in the section.
e3ec3a3a
MW
359 """
360
361 ## Initialize for a depth-first walk of the inheritance graph.
4063c2b5 362 seen = { 'name': True }
f417591a 363 visiting = { me.name: True }
4251f8ad 364 stack = [me]
e3ec3a3a
MW
365
366 ## Visit nodes, collecting their keys. Don't believe the values:
367 ## resolving inheritance is too hard to do like this.
368 while stack:
4251f8ad 369 sec = stack.pop()
f417591a
MW
370 for p in sec._parents():
371 if p.name not in visiting:
372 stack.append(p); visiting[p.name] = True
e3ec3a3a 373
7dd9d51f 374 for key in sec._itemmap.iterkeys(): seen[key] = None
e3ec3a3a 375
e3ec3a3a 376 ## And we're done.
6e5794ef 377 return seen.iterkeys()
e3ec3a3a 378
b7e5aa06 379class MyConfigParser (object):
6005ef9b
MW
380 """
381 A more advanced configuration parser.
382
b7e5aa06 383 This has four major enhancements over the standard ConfigParser which are
6005ef9b
MW
384 relevant to us.
385
386 * It recognizes `@inherits' keys and follows them when expanding a
387 value.
388
389 * It recognizes `$(VAR)' references to configuration variables during
390 expansion and processes them correctly.
391
ef7d7afb
MW
392 * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them
393 correctly. FLAGS may be empty, or `*' (all addresses, space-separated,
394 rather than just the first).
6005ef9b 395
b7e5aa06
MW
396 * Its parsing behaviour is well-defined.
397
6005ef9b
MW
398 Use:
399
b7e5aa06 400 1. Call parse(FILENAME) to slurp in the configuration data.
6005ef9b
MW
401
402 2. Call resolve() to collect the hostnames which need to be resolved and
403 actually do the name resolution.
404
e3ec3a3a
MW
405 3. Call sections() to get a list of the configuration sections, or
406 section(NAME) to find a named section.
407
408 4. Call get(ITEM) on a section to collect the results, or items() to
6005ef9b
MW
409 iterate over them.
410 """
411
412 def __init__(me):
413 """
414 Initialize a new, empty configuration parser.
415 """
b7e5aa06 416 me._sectmap = dict()
6005ef9b
MW
417 me._resolver = BulkResolver()
418
b7e5aa06
MW
419 def parse(me, f):
420 """
421 Parse configuration from a file F.
422 """
423
424 ## Initial parser state.
425 sect = None
426 key = None
427 val = None
428 lno = 0
429
430 ## An unpleasant hack. Python makes it hard to capture a value in a
431 ## variable and examine it in a single action, and this is the best that
432 ## I came up with.
433 m = [None]
434 def match(rx): m[0] = rx.match(line); return m[0]
435
436 ## Commit a key's value when we've determined that there are no further
437 ## continuation lines.
438 def flush():
e3ec3a3a 439 if key is not None: sect._itemmap[key] = val.getvalue()
b7e5aa06
MW
440
441 ## Work through all of the input lines.
442 for line in f:
443 lno += 1
444
445 if match(RX_COMMENT):
446 ## A comment or a blank line. Nothing doing. (This means that we
447 ## leave out blank lines which look like they might be continuation
448 ## lines.)
449
450 pass
451
452 elif match(RX_GRPHDR):
453 ## A section header. Flush out any previous value and set up the new
454 ## group.
455
456 flush()
457 name = m[0].group(1)
458 try: sect = me._sectmap[name]
e3ec3a3a 459 except KeyError: sect = me._sectmap[name] = ConfigSection(name, me)
b7e5aa06
MW
460 key = None
461
462 elif match(RX_ASSGN):
463 ## A new assignment. Flush out the old one, and set up to store this
464 ## one.
465
466 if sect is None:
467 raise ConfigSyntaxError(f.name, lno, 'no active section to update')
468 flush()
469 key = m[0].group(1)
470 val = StringIO(); val.write(m[0].group(2))
471
472 elif match(RX_CONT):
473 ## A continuation line. Accumulate the value.
474
475 if key is None:
476 raise ConfigSyntaxError(f.name, lno, 'no config value to continue')
477 val.write('\n'); val.write(m[0].group(1))
478
479 else:
480 ## Something else.
481
482 raise ConfigSyntaxError(f.name, lno, 'incomprehensible line')
483
484 ## Don't forget to commit any final value material.
485 flush()
486
e3ec3a3a
MW
487 def section(me, name):
488 """Return a ConfigSection with the given NAME."""
489 try: return me._sectmap[name]
490 except KeyError: raise MissingSectionException(name)
491
b7e5aa06 492 def sections(me):
e3ec3a3a
MW
493 """Yield the known sections."""
494 return me._sectmap.itervalues()
b7e5aa06 495
6005ef9b
MW
496 def resolve(me):
497 """
498 Works out all of the hostnames which need resolving and resolves them.
499
500 Until you call this, attempts to fetch configuration items which need to
501 resolve hostnames will fail!
502 """
e3ec3a3a 503 for sec in me.sections():
85341d9c
MW
504 for key in sec.items():
505 value = sec.get(key, resolvep = False)
2d51bc9f 506 for match in RX_RESOLVE.finditer(value):
ef7d7afb 507 me._resolver.prepare(match.group(2))
6005ef9b
MW
508 me._resolver.run()
509
6005ef9b
MW
510###--------------------------------------------------------------------------
511### Command-line handling.
512
513def inputiter(things):
514 """
515 Iterate over command-line arguments, returning corresponding open files.
516
517 If none were given, or one is `-', assume standard input; if one is a
518 directory, scan it for files other than backups; otherwise return the
519 opened files.
520 """
521
522 if not things:
523 if OS.isatty(stdin.fileno()):
524 M.die('no input given, and stdin is a terminal')
525 yield stdin
526 else:
527 for thing in things:
528 if thing == '-':
529 yield stdin
530 elif OS.path.isdir(thing):
531 for item in OS.listdir(thing):
532 if item.endswith('~') or item.endswith('#'):
533 continue
534 name = OS.path.join(thing, item)
535 if not OS.path.isfile(name):
536 continue
537 yield file(name)
538 else:
539 yield file(thing)
540
541def parse_options(argv = argv):
542 """
543 Parse command-line options, returning a pair (OPTS, ARGS).
544 """
545 M.ego(argv[0])
546 op = OptionParser(usage = '%prog [-c CDB] INPUT...',
547 version = '%%prog (tripe, version %s)' % VERSION)
548 op.add_option('-c', '--cdb', metavar = 'CDB',
549 dest = 'cdbfile', default = None,
550 help = 'Compile output into a CDB file.')
551 opts, args = op.parse_args(argv)
552 return opts, args
553
554###--------------------------------------------------------------------------
555### Main code.
556
557def getconf(args):
558 """
559 Read the configuration files and return the accumulated result.
560
561 We make sure that all hostnames have been properly resolved.
562 """
563 conf = MyConfigParser()
564 for f in inputiter(args):
b7e5aa06 565 conf.parse(f)
6005ef9b
MW
566 conf.resolve()
567 return conf
568
569def output(conf, cdb):
570 """
571 Output the configuration information CONF to the database CDB.
572
573 This is where the special `user' and `auto' database entries get set.
574 """
575 auto = []
e3ec3a3a
MW
576 for sec in sorted(conf.sections(), key = lambda sec: sec.name):
577 if sec.name.startswith('@'):
6005ef9b 578 continue
e3ec3a3a
MW
579 elif sec.name.startswith('$'):
580 label = sec.name
6005ef9b 581 else:
e3ec3a3a 582 label = 'P%s' % sec.name
fd1ba90c
MW
583 try: a = sec.get('auto')
584 except MissingKeyException: pass
585 else:
586 if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name)
587 try: u = sec.get('user')
588 except MissingKeyException: pass
589 else: cdb.add('U%s' % u)
6090fc43 590 url = M.URLEncode(semip = True)
85341d9c 591 for key in sorted(sec.items()):
6005ef9b 592 if not key.startswith('@'):
6090fc43 593 url.encode(key, sec.get(key))
6005ef9b
MW
594 cdb.add(label, url.result)
595 cdb.add('%AUTO', ' '.join(auto))
596 cdb.finish()
597
598def main():
599 """Main program."""
600 opts, args = parse_options()
601 if opts.cdbfile:
602 cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new')
603 else:
604 cdb = CDBFake()
1c4623dd
MW
605 try:
606 conf = getconf(args[1:])
607 output(conf, cdb)
608 except ExpectedError, e:
609 M.moan(str(e))
610 exit(2)
6005ef9b
MW
611
612if __name__ == '__main__':
613 main()
614
615###----- That's all, folks --------------------------------------------------