peerdb/tripe-newpeers.in: Add support for v4 and v6 address literals.
[tripe] / peerdb / tripe-newpeers.in
CommitLineData
6005ef9b
MW
1#! @PYTHON@
2### -*-python-*-
3###
4### Build a CDB file from configuration file
5###
6### (c) 2007 Straylight/Edgeware
7###
8
9###----- Licensing notice ---------------------------------------------------
10###
11### This file is part of Trivial IP Encryption (TrIPE).
12###
11ad66c2
MW
13### TrIPE is free software: you can redistribute it and/or modify it under
14### the terms of the GNU General Public License as published by the Free
15### Software Foundation; either version 3 of the License, or (at your
16### option) any later version.
6005ef9b 17###
11ad66c2
MW
18### TrIPE is distributed in the hope that it will be useful, but WITHOUT
19### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20### FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21### for more details.
6005ef9b
MW
22###
23### You should have received a copy of the GNU General Public License
11ad66c2 24### along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
6005ef9b
MW
25
26VERSION = '@VERSION@'
27
28###--------------------------------------------------------------------------
29### External dependencies.
30
6005ef9b
MW
31import mLib as M
32from optparse import OptionParser
33import cdb as CDB
34from sys import stdin, stdout, exit, argv
35import re as RX
36import os as OS
97567475 37import socket as S
b7e5aa06 38from cStringIO import StringIO
6005ef9b
MW
39
40###--------------------------------------------------------------------------
41### Utilities.
42
43class CDBFake (object):
44 """Like cdbmake, but just outputs data suitable for cdb-map."""
45 def __init__(me, file = stdout):
46 me.file = file
47 def add(me, key, value):
48 me.file.write('%s:%s\n' % (key, value))
49 def finish(me):
50 pass
51
1c4623dd
MW
52class ExpectedError (Exception): pass
53
6005ef9b
MW
54###--------------------------------------------------------------------------
55### A bulk DNS resolver.
56
1c4623dd 57class ResolverFailure (ExpectedError):
6f48da4a
MW
58 def __init__(me, host, msg):
59 me.host = host
60 me.msg = msg
61 def __str__(me):
62 return "failed to resolve `%s': %s" % (me.host, me.msg)
63
660564a1
MW
64class ResolvingHost (object):
65 """
66 A host name which is being looked up by a bulk-resolver instance.
ef7d7afb
MW
67
68 Most notably, this is where the flag-handling logic lives for the
69 $FLAGS[HOSTNAME] syntax.
660564a1
MW
70 """
71
72 def __init__(me, name):
73 """Make a new resolving-host object for the host NAME."""
74 me.name = name
97567475 75 me.addr = { 'INET': [], 'INET6': [] }
660564a1
MW
76 me.failure = None
77
cc72e4b3
MW
78 def addaddr(me, af, addr):
79 """
80 Add the address ADDR with address family AF.
81
97567475 82 The address family may be `INET' or `INET6'.
cc72e4b3
MW
83 """
84 me.addr[af].append(addr)
660564a1
MW
85
86 def failed(me, msg):
87 """
88 Report that resolution of this host failed, with a human-readable MSG.
89 """
90 me.failure = msg
91
ef7d7afb
MW
92 def get(me, flags):
93 """Return a list of addresses according to the FLAGS string."""
660564a1 94 if me.failure is not None: raise ResolverFailure(me.name, me.failure)
cc72e4b3
MW
95 aa = []
96 a4 = me.addr['INET']
97567475 97 a6 = me.addr['INET6']
cc72e4b3 98 all, any = False, False
ef7d7afb
MW
99 for ch in flags:
100 if ch == '*': all = True
cc72e4b3 101 elif ch == '4': aa += a4; any = True
97567475 102 elif ch == '6': aa += a6; any = True
ef7d7afb 103 else: raise ValueError("unknown address-resolution flag `%s'" % ch)
97567475 104 if not any: aa = a4 + a6
ef7d7afb
MW
105 if not aa: raise ResolverFailure(me.name, 'no matching addresses found')
106 if not all: aa = [aa[0]]
107 return aa
660564a1 108
6005ef9b
MW
109class BulkResolver (object):
110 """
111 Resolve a number of DNS names in parallel.
112
113 The BulkResovler resolves a number of hostnames in parallel. Using it
114 works in three phases:
115
116 1. You call prepare(HOSTNAME) a number of times, to feed in the hostnames
117 you're interested in.
118
119 2. You call run() to actually drive the resolver.
120
121 3. You call lookup(HOSTNAME) to get the address you wanted. This will
122 fail with KeyError if the resolver couldn't resolve the HOSTNAME.
123 """
124
125 def __init__(me):
126 """Initialize the resolver."""
6005ef9b 127 me._namemap = {}
660564a1
MW
128 me._noutstand = 0
129
81b1fdde
MW
130 def _prepare(me, host, name):
131 """Arrange to resolve a NAME, reporting the results to HOST."""
132 host._resolv = M.SelResolveByName(
133 name,
134 lambda cname, alias, addr: me._resolved(host, cname, addr),
135 lambda: me._resolved(host, None, []))
136 me._noutstand += 1
137
660564a1
MW
138 def prepare(me, name):
139 """Prime the resolver to resolve the given host NAME."""
140 if name not in me._namemap:
141 me._namemap[name] = host = ResolvingHost(name)
97567475
MW
142 try:
143 ailist = S.getaddrinfo(name, None, S.AF_UNSPEC, S.SOCK_DGRAM, 0,
144 S.AI_NUMERICHOST | S.AI_NUMERICSERV)
145 except S.gaierror:
146 me._prepare(host, name)
147 else:
148 for af, skty, proto, cname, sa in ailist:
149 if af == S.AF_INET: host.addaddr('INET', sa[0])
150 elif af == S.AF_INET6: host.addaddr('INET6', sa[0])
6005ef9b
MW
151
152 def run(me):
153 """Run the background DNS resolver until it's finished."""
660564a1 154 while me._noutstand: M.select()
6005ef9b 155
ef7d7afb 156 def lookup(me, name, flags):
660564a1 157 """Fetch the address corresponding to the host NAME."""
ef7d7afb 158 return me._namemap[name].get(flags)
6005ef9b 159
3c8803fa 160 def _resolved(me, host, cname, addr):
ef7d7afb
MW
161 """Callback function: remember that ADDRs are the addresses for HOST."""
162 if not addr:
660564a1
MW
163 host.failed('(unknown failure)')
164 else:
3c8803fa 165 if cname is not None: host.name = cname
cc72e4b3 166 for a in addr: host.addaddr('INET', a)
660564a1
MW
167 host._resolv = None
168 me._noutstand -= 1
6005ef9b
MW
169
170###--------------------------------------------------------------------------
171### The configuration parser.
172
b7e5aa06
MW
173## Match a comment or empty line.
174RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])')
175
176## Match a section group header.
177RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $')
178
179## Match an assignment line.
180RX_ASSGN = RX.compile(r'''(?x) ^
181 ([^\s:=] (?: [^:=]* [^\s:=])?)
182 \s* [:=] \s*
183 (| \S | \S.*\S)
184 \s* $''')
185
186## Match a continuation line.
187RX_CONT = RX.compile(r'''(?x) ^ \s+
188 (| \S | \S.*\S)
189 \s* $''')
190
6005ef9b 191## Match a $(VAR) configuration variable reference; group 1 is the VAR.
2d51bc9f 192RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)')
6005ef9b 193
ef7d7afb
MW
194## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags;
195## group 2 is the HOST.
97567475 196RX_RESOLVE = RX.compile(r'(?x) \$ ([46*]*) \[ ([^]]+) \]')
6005ef9b 197
1c4623dd 198class ConfigSyntaxError (ExpectedError):
b7e5aa06
MW
199 def __init__(me, fname, lno, msg):
200 me.fname = fname
201 me.lno = lno
202 me.msg = msg
203 def __str__(me):
204 return '%s:%d: %s' % (me.fname, me.lno, me.msg)
205
bd3db76c
MW
206def _fmt_path(path):
207 return ' -> '.join(["`%s'" % hop for hop in path])
208
1c4623dd 209class AmbiguousOptionError (ExpectedError):
bd3db76c
MW
210 def __init__(me, key, patha, vala, pathb, valb):
211 me.key = key
212 me.patha, me.vala = patha, vala
213 me.pathb, me.valb = pathb, valb
214 def __str__(me):
215 return "Ambiguous answer resolving key `%s': " \
216 "path %s yields `%s' but %s yields `%s'" % \
217 (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb)
218
1c4623dd 219class InheritanceCycleError (ExpectedError):
bd3db76c
MW
220 def __init__(me, key, path):
221 me.key = key
222 me.path = path
223 def __str__(me):
224 return "Found a cycle %s looking up key `%s'" % \
225 (_fmt_path(me.path), me.key)
226
1c4623dd 227class MissingSectionException (ExpectedError):
e3ec3a3a 228 def __init__(me, sec):
260dce8e 229 me.sec = sec
e3ec3a3a
MW
230 def __str__(me):
231 return "Section `%s' not found" % (me.sec)
232
1c4623dd 233class MissingKeyException (ExpectedError):
bd3db76c
MW
234 def __init__(me, sec, key):
235 me.sec = sec
236 me.key = key
237 def __str__(me):
238 return "Key `%s' not found in section `%s'" % (me.key, me.sec)
239
e3ec3a3a
MW
240class ConfigSection (object):
241 """
242 A section in a configuration parser.
243
244 This is where a lot of the nitty-gritty stuff actually happens. The
245 `MyConfigParser' knows a lot about the internals of this class, which saves
246 on building a complicated interface.
247 """
248
249 def __init__(me, name, cp):
250 """Initialize a new, empty section with a given NAME and parent CP."""
886350e8
MW
251
252 ## The cache maps item keys to entries, which consist of a pair of
253 ## objects. There are four possible states for a cache entry:
254 ##
255 ## * missing -- there is no entry at all with this key, so we must
256 ## search for it;
257 ##
258 ## * None, None -- we are actively trying to resolve this key, so if we
259 ## encounter this state, we have found a cycle in the inheritance
260 ## graph;
261 ##
262 ## * None, [] -- we know that this key isn't reachable through any of
263 ## our parents;
264 ##
265 ## * VALUE, PATH -- we know that the key resolves to VALUE, along the
266 ## PATH from us (exclusive) to the defining parent (inclusive).
e3ec3a3a
MW
267 me.name = name
268 me._itemmap = dict()
886350e8 269 me._cache = dict()
e3ec3a3a
MW
270 me._cp = cp
271
272 def _expand(me, string, resolvep):
273 """
ef7d7afb 274 Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING.
e3ec3a3a
MW
275
276 RESOLVEP is a boolean switch: do we bother to tax the resolver or not?
277 This is turned off by MyConfigParser's resolve() method while it's
278 collecting hostnames to be resolved.
279 """
ef7d7afb 280 string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string)
e3ec3a3a 281 if resolvep:
ef7d7afb
MW
282 string = RX_RESOLVE.sub(
283 lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))),
284 string)
e3ec3a3a
MW
285 return string
286
4251f8ad
MW
287 def _parents(me):
288 """Yield this section's parents."""
289 try: names = me._itemmap['@inherit']
290 except KeyError: return
291 for name in names.replace(',', ' ').split():
292 yield me._cp.section(name)
293
886350e8 294 def _get(me, key, path = None):
e3ec3a3a
MW
295 """
296 Low-level option-fetching method.
297
298 Fetch the value for the named KEY in this section, or maybe (recursively)
299 a section which it inherits from.
300
301 Returns a pair VALUE, PATH. The value is not expanded; nor do we check
302 for the special `name' key. The caller is expected to do these things.
303 Returns None if no value could be found.
304 """
305
886350e8 306 ## If we weren't given a path, then we'd better make one.
e3ec3a3a
MW
307 if path is None: path = []
308
309 ## Extend the path to cover us, but remember to remove us again when
310 ## we've finished. If we need to pass the current path back upwards,
311 ## then remember to take a copy.
312 path.append(me.name)
313 try:
314
886350e8
MW
315 ## If we've been this way before on another pass through then return the
316 ## value we found then. If we're still thinking about it then we've
317 ## found a cycle.
318 try: v, p = me._cache[key]
e3ec3a3a
MW
319 except KeyError: pass
320 else:
886350e8
MW
321 if p is None: raise InheritanceCycleError(key, path[:])
322 else: return v, path + p
e3ec3a3a
MW
323
324 ## See whether the answer is ready waiting for us.
325 try: v = me._itemmap[key]
326 except KeyError: pass
886350e8
MW
327 else:
328 p = path[:]
329 me._cache[key] = v, []
330 return v, p
e3ec3a3a 331
e3ec3a3a
MW
332 ## Initially we have no idea.
333 value = None
886350e8 334 winner = []
e3ec3a3a
MW
335
336 ## Go through our parents and ask them what they think.
886350e8 337 me._cache[key] = None, None
4251f8ad 338 for p in me._parents():
e3ec3a3a
MW
339
340 ## See whether we get an answer. If not, keep on going.
886350e8 341 v, pp = p._get(key, path)
e3ec3a3a
MW
342 if v is None: continue
343
344 ## If we got an answer, check that it matches any previous ones.
345 if value is None:
346 value = v
347 winner = pp
348 elif value != v:
349 raise AmbiguousOptionError(key, winner, value, pp, v)
350
351 ## That's the best we could manage.
886350e8 352 me._cache[key] = value, winner[len(path):]
e3ec3a3a
MW
353 return value, winner
354
355 finally:
356 ## Remove us from the path again.
357 path.pop()
358
359 def get(me, key, resolvep = True):
360 """
361 Retrieve the value of KEY from this section.
362 """
363
364 ## Special handling for the `name' key.
365 if key == 'name':
366 value = me._itemmap.get('name', me.name)
7dd9d51f
MW
367 elif key == '@inherits':
368 try: return me._itemmap['@inherits']
369 except KeyError: raise MissingKeyException(me.name, key)
e3ec3a3a
MW
370 else:
371 value, _ = me._get(key)
372 if value is None:
373 raise MissingKeyException(me.name, key)
374
375 ## Expand the value and return it.
376 return me._expand(value, resolvep)
377
378 def items(me, resolvep = True):
379 """
85341d9c 380 Yield a list of item names in the section.
e3ec3a3a
MW
381 """
382
383 ## Initialize for a depth-first walk of the inheritance graph.
4063c2b5 384 seen = { 'name': True }
f417591a 385 visiting = { me.name: True }
4251f8ad 386 stack = [me]
e3ec3a3a
MW
387
388 ## Visit nodes, collecting their keys. Don't believe the values:
389 ## resolving inheritance is too hard to do like this.
390 while stack:
4251f8ad 391 sec = stack.pop()
f417591a
MW
392 for p in sec._parents():
393 if p.name not in visiting:
394 stack.append(p); visiting[p.name] = True
e3ec3a3a 395
7dd9d51f 396 for key in sec._itemmap.iterkeys(): seen[key] = None
e3ec3a3a 397
e3ec3a3a 398 ## And we're done.
6e5794ef 399 return seen.iterkeys()
e3ec3a3a 400
b7e5aa06 401class MyConfigParser (object):
6005ef9b
MW
402 """
403 A more advanced configuration parser.
404
b7e5aa06 405 This has four major enhancements over the standard ConfigParser which are
6005ef9b
MW
406 relevant to us.
407
408 * It recognizes `@inherits' keys and follows them when expanding a
409 value.
410
411 * It recognizes `$(VAR)' references to configuration variables during
412 expansion and processes them correctly.
413
ef7d7afb 414 * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them
97567475
MW
415 correctly. FLAGS consists of characters `4' (IPv4 addresses), `6'
416 (IPv6 addresses), and `*' (all, space-separated, rather than just the
417 first).
6005ef9b 418
b7e5aa06
MW
419 * Its parsing behaviour is well-defined.
420
6005ef9b
MW
421 Use:
422
b7e5aa06 423 1. Call parse(FILENAME) to slurp in the configuration data.
6005ef9b
MW
424
425 2. Call resolve() to collect the hostnames which need to be resolved and
426 actually do the name resolution.
427
e3ec3a3a
MW
428 3. Call sections() to get a list of the configuration sections, or
429 section(NAME) to find a named section.
430
431 4. Call get(ITEM) on a section to collect the results, or items() to
6005ef9b
MW
432 iterate over them.
433 """
434
435 def __init__(me):
436 """
437 Initialize a new, empty configuration parser.
438 """
b7e5aa06 439 me._sectmap = dict()
6005ef9b
MW
440 me._resolver = BulkResolver()
441
b7e5aa06
MW
442 def parse(me, f):
443 """
444 Parse configuration from a file F.
445 """
446
447 ## Initial parser state.
448 sect = None
449 key = None
450 val = None
451 lno = 0
452
453 ## An unpleasant hack. Python makes it hard to capture a value in a
454 ## variable and examine it in a single action, and this is the best that
455 ## I came up with.
456 m = [None]
457 def match(rx): m[0] = rx.match(line); return m[0]
458
459 ## Commit a key's value when we've determined that there are no further
460 ## continuation lines.
461 def flush():
e3ec3a3a 462 if key is not None: sect._itemmap[key] = val.getvalue()
b7e5aa06
MW
463
464 ## Work through all of the input lines.
465 for line in f:
466 lno += 1
467
468 if match(RX_COMMENT):
469 ## A comment or a blank line. Nothing doing. (This means that we
470 ## leave out blank lines which look like they might be continuation
471 ## lines.)
472
473 pass
474
475 elif match(RX_GRPHDR):
476 ## A section header. Flush out any previous value and set up the new
477 ## group.
478
479 flush()
480 name = m[0].group(1)
481 try: sect = me._sectmap[name]
e3ec3a3a 482 except KeyError: sect = me._sectmap[name] = ConfigSection(name, me)
b7e5aa06
MW
483 key = None
484
485 elif match(RX_ASSGN):
486 ## A new assignment. Flush out the old one, and set up to store this
487 ## one.
488
489 if sect is None:
490 raise ConfigSyntaxError(f.name, lno, 'no active section to update')
491 flush()
492 key = m[0].group(1)
493 val = StringIO(); val.write(m[0].group(2))
494
495 elif match(RX_CONT):
496 ## A continuation line. Accumulate the value.
497
498 if key is None:
499 raise ConfigSyntaxError(f.name, lno, 'no config value to continue')
500 val.write('\n'); val.write(m[0].group(1))
501
502 else:
503 ## Something else.
504
505 raise ConfigSyntaxError(f.name, lno, 'incomprehensible line')
506
507 ## Don't forget to commit any final value material.
508 flush()
509
e3ec3a3a
MW
510 def section(me, name):
511 """Return a ConfigSection with the given NAME."""
512 try: return me._sectmap[name]
513 except KeyError: raise MissingSectionException(name)
514
b7e5aa06 515 def sections(me):
e3ec3a3a
MW
516 """Yield the known sections."""
517 return me._sectmap.itervalues()
b7e5aa06 518
6005ef9b
MW
519 def resolve(me):
520 """
521 Works out all of the hostnames which need resolving and resolves them.
522
523 Until you call this, attempts to fetch configuration items which need to
524 resolve hostnames will fail!
525 """
e3ec3a3a 526 for sec in me.sections():
85341d9c
MW
527 for key in sec.items():
528 value = sec.get(key, resolvep = False)
2d51bc9f 529 for match in RX_RESOLVE.finditer(value):
ef7d7afb 530 me._resolver.prepare(match.group(2))
6005ef9b
MW
531 me._resolver.run()
532
6005ef9b
MW
533###--------------------------------------------------------------------------
534### Command-line handling.
535
536def inputiter(things):
537 """
538 Iterate over command-line arguments, returning corresponding open files.
539
540 If none were given, or one is `-', assume standard input; if one is a
541 directory, scan it for files other than backups; otherwise return the
542 opened files.
543 """
544
545 if not things:
546 if OS.isatty(stdin.fileno()):
547 M.die('no input given, and stdin is a terminal')
548 yield stdin
549 else:
550 for thing in things:
551 if thing == '-':
552 yield stdin
553 elif OS.path.isdir(thing):
554 for item in OS.listdir(thing):
555 if item.endswith('~') or item.endswith('#'):
556 continue
557 name = OS.path.join(thing, item)
558 if not OS.path.isfile(name):
559 continue
560 yield file(name)
561 else:
562 yield file(thing)
563
564def parse_options(argv = argv):
565 """
566 Parse command-line options, returning a pair (OPTS, ARGS).
567 """
568 M.ego(argv[0])
569 op = OptionParser(usage = '%prog [-c CDB] INPUT...',
570 version = '%%prog (tripe, version %s)' % VERSION)
571 op.add_option('-c', '--cdb', metavar = 'CDB',
572 dest = 'cdbfile', default = None,
573 help = 'Compile output into a CDB file.')
574 opts, args = op.parse_args(argv)
575 return opts, args
576
577###--------------------------------------------------------------------------
578### Main code.
579
580def getconf(args):
581 """
582 Read the configuration files and return the accumulated result.
583
584 We make sure that all hostnames have been properly resolved.
585 """
586 conf = MyConfigParser()
587 for f in inputiter(args):
b7e5aa06 588 conf.parse(f)
6005ef9b
MW
589 conf.resolve()
590 return conf
591
592def output(conf, cdb):
593 """
594 Output the configuration information CONF to the database CDB.
595
596 This is where the special `user' and `auto' database entries get set.
597 """
598 auto = []
e3ec3a3a
MW
599 for sec in sorted(conf.sections(), key = lambda sec: sec.name):
600 if sec.name.startswith('@'):
6005ef9b 601 continue
e3ec3a3a
MW
602 elif sec.name.startswith('$'):
603 label = sec.name
6005ef9b 604 else:
e3ec3a3a 605 label = 'P%s' % sec.name
fd1ba90c
MW
606 try: a = sec.get('auto')
607 except MissingKeyException: pass
608 else:
609 if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name)
610 try: u = sec.get('user')
611 except MissingKeyException: pass
612 else: cdb.add('U%s' % u)
6090fc43 613 url = M.URLEncode(semip = True)
85341d9c 614 for key in sorted(sec.items()):
6005ef9b 615 if not key.startswith('@'):
6090fc43 616 url.encode(key, sec.get(key))
6005ef9b
MW
617 cdb.add(label, url.result)
618 cdb.add('%AUTO', ' '.join(auto))
619 cdb.finish()
620
621def main():
622 """Main program."""
623 opts, args = parse_options()
624 if opts.cdbfile:
625 cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new')
626 else:
627 cdb = CDBFake()
1c4623dd
MW
628 try:
629 conf = getconf(args[1:])
630 output(conf, cdb)
631 except ExpectedError, e:
632 M.moan(str(e))
633 exit(2)
6005ef9b
MW
634
635if __name__ == '__main__':
636 main()
637
638###----- That's all, folks --------------------------------------------------