X-Git-Url: https://git.distorted.org.uk/~mdw/tripe/blobdiff_plain/b7e5aa06ec192af281f7acb38f7cf8c8d8363dc8..HEAD:/peerdb/tripe-newpeers.in diff --git a/peerdb/tripe-newpeers.in b/peerdb/tripe-newpeers.in index 37c0a341..39529531 100644 --- a/peerdb/tripe-newpeers.in +++ b/peerdb/tripe-newpeers.in @@ -32,8 +32,12 @@ import mLib as M from optparse import OptionParser import cdb as CDB from sys import stdin, stdout, exit, argv +import subprocess as SUB import re as RX import os as OS +import errno as E +import fcntl as F +import socket as S from cStringIO import StringIO ###-------------------------------------------------------------------------- @@ -48,10 +52,64 @@ class CDBFake (object): def finish(me): pass +class ExpectedError (Exception): pass + ###-------------------------------------------------------------------------- ### A bulk DNS resolver. -class BulkResolver (object): +class ResolverFailure (ExpectedError): + def __init__(me, host, msg): + me.host = host + me.msg = msg + def __str__(me): + return "failed to resolve `%s': %s" % (me.host, me.msg) + +class ResolvingHost (object): + """ + A host name which is being looked up by a bulk-resolver instance. + + Most notably, this is where the flag-handling logic lives for the + $FLAGS[HOSTNAME] syntax. + """ + + def __init__(me, name): + """Make a new resolving-host object for the host NAME.""" + me.name = name + me.addr = { 'INET': [], 'INET6': [] } + me.failure = None + + def addaddr(me, af, addr): + """ + Add the address ADDR with address family AF. + + The address family may be `INET' or `INET6'. + """ + me.addr[af].append(addr) + + def failed(me, msg): + """ + Report that resolution of this host failed, with a human-readable MSG. + """ + me.failure = msg + + def get(me, flags): + """Return a list of addresses according to the FLAGS string.""" + if me.failure is not None: raise ResolverFailure(me.name, me.failure) + aa = [] + a4 = me.addr['INET'] + a6 = me.addr['INET6'] + all, any = False, False + for ch in flags: + if ch == '*': all = True + elif ch == '4': aa += a4; any = True + elif ch == '6': aa += a6; any = True + else: raise ValueError("unknown address-resolution flag `%s'" % ch) + if not any: aa = a4 + a6 + if not aa: raise ResolverFailure(me.name, 'no matching addresses found') + if not all: aa = [aa[0]] + return aa + +class BaseBulkResolver (object): """ Resolve a number of DNS names in parallel. @@ -69,36 +127,193 @@ class BulkResolver (object): def __init__(me): """Initialize the resolver.""" - me._resolvers = {} me._namemap = {} - def prepare(me, host): - """Prime the resolver to resolve the name HOST.""" - if host not in me._resolvers: - me._resolvers[host] = M.SelResolveByName \ - (host, - lambda name, alias, addr: - me._resolved(host, addr[0]), - lambda: me._resolved(host, None)) + def prepare(me, name): + """Prime the resolver to resolve the given host NAME.""" + if name not in me._namemap: + me._namemap[name] = host = ResolvingHost(name) + try: + ailist = S.getaddrinfo(name, None, S.AF_UNSPEC, S.SOCK_DGRAM, 0, + S.AI_NUMERICHOST | S.AI_NUMERICSERV) + except S.gaierror: + me._prepare(host, name) + else: + for af, skty, proto, cname, sa in ailist: + if af == S.AF_INET: host.addaddr('INET', sa[0]) + elif af == S.AF_INET6: host.addaddr('INET6', sa[0]) + + def lookup(me, name, flags): + """Fetch the address corresponding to the host NAME.""" + return me._namemap[name].get(flags) + +class BresBulkResolver (BaseBulkResolver): + """ + A BulkResolver using mLib's `bres' background resolver. + + This is always available (and might use ADNS), but only does IPv4. + """ + + def __init__(me): + super(BresBulkResolver, me).__init__() + """Initialize the resolver.""" + me._noutstand = 0 + + def _prepare(me, host, name): + """Arrange to resolve a NAME, reporting the results to HOST.""" + host._resolv = M.SelResolveByName( + name, + lambda cname, alias, addr: me._resolved(host, cname, addr), + lambda: me._resolved(host, None, [])) + me._noutstand += 1 def run(me): """Run the background DNS resolver until it's finished.""" - while me._resolvers: - M.select() + while me._noutstand: M.select() - def lookup(me, host): + def _resolved(me, host, cname, addr): + """Callback function: remember that ADDRs are the addresses for HOST.""" + if not addr: + host.failed('(unknown failure)') + else: + if cname is not None: host.name = cname + for a in addr: host.addaddr('INET', a) + host._resolv = None + me._noutstand -= 1 + +class AdnsBulkResolver (BaseBulkResolver): + """ + A BulkResolver using ADNS, via the `adnshost' command-line tool. + + This can do simultaneous IPv4 and IPv6 lookups and is quite shiny. + """ + + def __init__(me): + """Initialize the resolver.""" + + super(AdnsBulkResolver, me).__init__() + + ## Start the external resolver process. + me._kid = SUB.Popen(['adnshost', '-afs'], + stdin = SUB.PIPE, stdout = SUB.PIPE) + + ## Set up the machinery for feeding input to the resolver. + me._in = me._kid.stdin + M.fdflags(me._in, fbic = OS.O_NONBLOCK, fxor = OS.O_NONBLOCK) + me._insel = M.SelFile(me._in.fileno(), M.SEL_WRITE, me._write) + me._inbuf, me._inoff, me._inlen = '', 0, 0 + me._idmap = {} + me._nextid = 0 + + ## Set up the machinery for collecting the resolver's output. + me._out = me._kid.stdout + M.fdflags(me._out, fbic = OS.O_NONBLOCK, fxor = OS.O_NONBLOCK) + me._outline = M.SelLineBuffer(me._out, + lineproc = me._hostline, eofproc = me._eof) + me._outline.enable() + + ## It's not finished yet. + me._done = False + + def _prepare(me, host, name): + """Arrange for the resolver to resolve the name NAME.""" + + ## Work out the next job id, and associate that with the host record. + host.id = me._nextid; me._nextid += 1 + me._namemap[name] = me._idmap[host.id] = host + + ## Feed the name to the resolver process. + me._inbuf += name + '\n' + me._inlen += len(name) + 1 + if not me._insel.activep: me._insel.enable() + while me._inoff < me._inlen: M.select() + + def _write(me): + """Write material from `_inbuf' to the resolver when it's ready.""" + + ## Try to feed some more material to the resolver. + try: n = OS.write(me._in.fileno(), me._inbuf[me._inoff:]) + except OSError, e: + if e.errno == E.EAGAIN or e.errno == E.EWOULDBLOCK: return + else: raise + + ## If we're done, then clear the buffer. + me._inoff += n + if me._inoff >= me._inlen: + me._insel.disable() + me._inbuf, me._inoff, me._inlen = '', 0, 0 + + def _eof(me): + """Notice that the resolver has finished.""" + me._outline.disable() + me._done = True + me._kid.wait() + + def run(me): """ - Fetch the address corresponding to HOST. + Tell the resolver it has all of our input now, and wait for it to finish. """ - addr = me._namemap[host] - if addr is None: - raise KeyError(host) - return addr - - def _resolved(me, host, addr): - """Callback function: remember that ADDR is the address for HOST.""" - me._namemap[host] = addr - del me._resolvers[host] + me._in.close() + while not me._done: M.select() + if me._idmap: + raise Exception('adnshost failed to process all the requests') + + def _hostline(me, line): + """Handle a host line from the resolver.""" + + ## Parse the line into fields. + (id, nrrs, stty, stocde, stmsg, owner, cname, ststr), _ = \ + M.split(line, quotep = True) + id, nrrs = int(id), int(nrrs) + + ## Find the right record. + host = me._idmap[id] + if stty != 'ok': host.failed(ststr) + + ## Stash away the canonical name of the host. + host.name = cname == '$' and owner or cname + + ## If there are no record lines to come, then remove this record from the + ## list of outstanding jobs. Otherwise, switch to the handler for record + ## lines. + if not nrrs: + del me._idmap[id] + else: + me._outline.lineproc = me._rrline + me._nrrs = nrrs + me._outhost = host + + def _rrline(me, line): + """Handle a record line from the resolver.""" + + ## Parse the line into fields. + ww, _ = M.split(line, quotep = True) + owner, type, af = ww[:3] + + ## If this is an address record, and it looks like an interesting address + ## type, then stash the address. + if type == 'A' and (af == 'INET' or af == 'INET6'): + me._outhost.addaddr(af, ww[3]) + + ## Update the parser state. If there are no more records for this job + ## then mark the job as done and switch back to expecting a host line. + me._nrrs -= 1 + if not me._nrrs: + me._outline.lineproc = me._hostline + del me._idmap[me._outhost.id] + me._outhost = None + +## Select a bulk resolver. If `adnshost' exists then we might as well use +## it. +BulkResolver = BresBulkResolver +try: + p = SUB.Popen(['adnshost', '--version'], + stdin = SUB.PIPE, stdout = SUB.PIPE, stderr = SUB.PIPE) + _out, _err = p.communicate() + st = p.wait() + if st == 0: BulkResolver = AdnsBulkResolver +except OSError: + pass ###-------------------------------------------------------------------------- ### The configuration parser. @@ -124,10 +339,11 @@ RX_CONT = RX.compile(r'''(?x) ^ \s+ ## Match a $(VAR) configuration variable reference; group 1 is the VAR. RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)') -## Match a $[HOST] name resolution reference; group 1 is the HOST. -RX_RESOLVE = RX.compile(r'(?x) \$ \[ ([^]]+) \]') +## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags; +## group 2 is the HOST. +RX_RESOLVE = RX.compile(r'(?x) \$ ([46*]*) \[ ([^]]+) \]') -class ConfigSyntaxError (Exception): +class ConfigSyntaxError (ExpectedError): def __init__(me, fname, lno, msg): me.fname = fname me.lno = lno @@ -138,7 +354,7 @@ class ConfigSyntaxError (Exception): def _fmt_path(path): return ' -> '.join(["`%s'" % hop for hop in path]) -class AmbiguousOptionError (Exception): +class AmbiguousOptionError (ExpectedError): def __init__(me, key, patha, vala, pathb, valb): me.key = key me.patha, me.vala = patha, vala @@ -148,7 +364,7 @@ class AmbiguousOptionError (Exception): "path %s yields `%s' but %s yields `%s'" % \ (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb) -class InheritanceCycleError (Exception): +class InheritanceCycleError (ExpectedError): def __init__(me, key, path): me.key = key me.path = path @@ -156,13 +372,180 @@ class InheritanceCycleError (Exception): return "Found a cycle %s looking up key `%s'" % \ (_fmt_path(me.path), me.key) -class MissingKeyException (Exception): +class MissingSectionException (ExpectedError): + def __init__(me, sec): + me.sec = sec + def __str__(me): + return "Section `%s' not found" % (me.sec) + +class MissingKeyException (ExpectedError): def __init__(me, sec, key): me.sec = sec me.key = key def __str__(me): return "Key `%s' not found in section `%s'" % (me.key, me.sec) +class ConfigSection (object): + """ + A section in a configuration parser. + + This is where a lot of the nitty-gritty stuff actually happens. The + `MyConfigParser' knows a lot about the internals of this class, which saves + on building a complicated interface. + """ + + def __init__(me, name, cp): + """Initialize a new, empty section with a given NAME and parent CP.""" + + ## The cache maps item keys to entries, which consist of a pair of + ## objects. There are four possible states for a cache entry: + ## + ## * missing -- there is no entry at all with this key, so we must + ## search for it; + ## + ## * None, None -- we are actively trying to resolve this key, so if we + ## encounter this state, we have found a cycle in the inheritance + ## graph; + ## + ## * None, [] -- we know that this key isn't reachable through any of + ## our parents; + ## + ## * VALUE, PATH -- we know that the key resolves to VALUE, along the + ## PATH from us (exclusive) to the defining parent (inclusive). + me.name = name + me._itemmap = dict() + me._cache = dict() + me._cp = cp + + def _expand(me, string, resolvep): + """ + Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING. + + RESOLVEP is a boolean switch: do we bother to tax the resolver or not? + This is turned off by MyConfigParser's resolve() method while it's + collecting hostnames to be resolved. + """ + string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string) + if resolvep: + string = RX_RESOLVE.sub( + lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))), + string) + return string + + def _parents(me): + """Yield this section's parents.""" + try: names = me._itemmap['@inherit'] + except KeyError: return + for name in names.replace(',', ' ').split(): + yield me._cp.section(name) + + def _get(me, key, path = None): + """ + Low-level option-fetching method. + + Fetch the value for the named KEY in this section, or maybe (recursively) + a section which it inherits from. + + Returns a pair VALUE, PATH. The value is not expanded; nor do we check + for the special `name' key. The caller is expected to do these things. + Returns None if no value could be found. + """ + + ## If we weren't given a path, then we'd better make one. + if path is None: path = [] + + ## Extend the path to cover us, but remember to remove us again when + ## we've finished. If we need to pass the current path back upwards, + ## then remember to take a copy. + path.append(me.name) + try: + + ## If we've been this way before on another pass through then return + ## the value we found then. If we're still thinking about it then + ## we've found a cycle. + try: v, p = me._cache[key] + except KeyError: pass + else: + if p is None: raise InheritanceCycleError(key, path[:]) + else: return v, path + p + + ## See whether the answer is ready waiting for us. + try: v = me._itemmap[key] + except KeyError: pass + else: + p = path[:] + me._cache[key] = v, [] + return v, p + + ## Initially we have no idea. + value = None + winner = [] + + ## Go through our parents and ask them what they think. + me._cache[key] = None, None + for p in me._parents(): + + ## See whether we get an answer. If not, keep on going. + v, pp = p._get(key, path) + if v is None: continue + + ## If we got an answer, check that it matches any previous ones. + if value is None: + value = v + winner = pp + elif value != v: + raise AmbiguousOptionError(key, winner, value, pp, v) + + ## That's the best we could manage. + me._cache[key] = value, winner[len(path):] + return value, winner + + finally: + ## Remove us from the path again. + path.pop() + + def get(me, key, resolvep = True): + """ + Retrieve the value of KEY from this section. + """ + + ## Special handling for the `name' key. + if key == 'name': + value = me._itemmap.get('name', me.name) + elif key == '@inherits': + try: return me._itemmap['@inherits'] + except KeyError: raise MissingKeyException(me.name, key) + else: + value, _ = me._get(key) + if value is None: + raise MissingKeyException(me.name, key) + + ## Expand the value and return it. + return me._expand(value, resolvep) + + def items(me, resolvep = True): + """ + Yield a list of item names in the section. + """ + + ## Initialize for a depth-first walk of the inheritance graph. + seen = { 'name': True } + visiting = { me.name: True } + stack = [me] + + ## Visit nodes, collecting their keys. Don't believe the values: + ## resolving inheritance is too hard to do like this. + while stack: + sec = stack.pop() + for p in sec._parents(): + if p.name not in visiting: + stack.append(p); visiting[p.name] = True + + for key in sec._itemmap.iterkeys(): seen[key] = None + + ## And we're done. + return seen.iterkeys() + class MyConfigParser (object): """ A more advanced configuration parser. @@ -176,8 +559,10 @@ class MyConfigParser (object): * It recognizes `$(VAR)' references to configuration variables during expansion and processes them correctly. - * It recognizes `$[HOST]' name-resolver requests and handles them - correctly. + * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them + correctly. FLAGS consists of characters `4' (IPv4 addresses), `6' + (IPv6 addresses), and `*' (all, space-separated, rather than just the + first). * Its parsing behaviour is well-defined. @@ -188,7 +573,10 @@ class MyConfigParser (object): 2. Call resolve() to collect the hostnames which need to be resolved and actually do the name resolution. - 3. Call get(SECTION, ITEM) to collect the results, or items(SECTION) to + 3. Call sections() to get a list of the configuration sections, or + section(NAME) to find a named section. + + 4. Call get(ITEM) on a section to collect the results, or items() to iterate over them. """ @@ -219,7 +607,7 @@ class MyConfigParser (object): ## Commit a key's value when we've determined that there are no further ## continuation lines. def flush(): - if key is not None: sect[key] = val.getvalue() + if key is not None: sect._itemmap[key] = val.getvalue() ## Work through all of the input lines. for line in f: @@ -239,7 +627,7 @@ class MyConfigParser (object): flush() name = m[0].group(1) try: sect = me._sectmap[name] - except KeyError: sect = me._sectmap[name] = dict() + except KeyError: sect = me._sectmap[name] = ConfigSection(name, me) key = None elif match(RX_ASSGN): @@ -267,9 +655,14 @@ class MyConfigParser (object): ## Don't forget to commit any final value material. flush() + def section(me, name): + """Return a ConfigSection with the given NAME.""" + try: return me._sectmap[name] + except KeyError: raise MissingSectionException(name) + def sections(me): - """Yield the known section names.""" - return me._sectmap.iterkeys() + """Yield the known sections.""" + return me._sectmap.itervalues() def resolve(me): """ @@ -278,152 +671,13 @@ class MyConfigParser (object): Until you call this, attempts to fetch configuration items which need to resolve hostnames will fail! """ - for sec in me._sectmap.iterkeys(): - for key, value in me.items(sec, resolvep = False): + for sec in me.sections(): + for key in sec.items(): + value = sec.get(key, resolvep = False) for match in RX_RESOLVE.finditer(value): - me._resolver.prepare(match.group(1)) + me._resolver.prepare(match.group(2)) me._resolver.run() - def _expand(me, sec, string, resolvep): - """ - Expands $(...) and (optionally) $[...] placeholders in STRING. - - The SEC is the configuration section from which to satisfy $(...) - requests. RESOLVEP is a boolean switch: do we bother to tax the resolver - or not? This is turned off by the resolve() method while it's collecting - hostnames to be resolved. - """ - string = RX_REF.sub \ - (lambda m: me.get(sec, m.group(1), resolvep), string) - if resolvep: - string = RX_RESOLVE.sub(lambda m: me._resolver.lookup(m.group(1)), - string) - return string - - def has_option(me, sec, key): - """ - Decide whether section SEC has a configuration key KEY. - - This version of the method properly handles the @inherit key. - """ - return key == 'name' or me._get(sec, key)[0] is not None - - def _get(me, sec, key, map = None, path = None): - """ - Low-level option-fetching method. - - Fetch the value for the named KEY from section SEC, or maybe - (recursively) a section which SEC inherits from. - - Returns a pair VALUE, PATH. The value is not expanded; nor do we check - for the special `name' key. The caller is expected to do these things. - Returns None if no value could be found. - """ - - ## If we weren't given a memoization map or path, then we'd better make - ## one. - if map is None: map = {} - if path is None: path = [] - - ## Extend the path to cover the lookup section, but remember to remove us - ## again when we've finished. If we need to pass the current path back - ## upwards, then remember to take a copy. - path.append(sec) - try: - - ## If we've been this way before on another pass through then return - ## the value we found then. If we're still thinking about it then - ## we've found a cycle. - try: threadp, value = map[sec] - except KeyError: pass - else: - if threadp: raise InheritanceCycleError(key, path[:]) - - ## See whether the answer is ready waiting for us. - try: v = me._sectmap[sec][key] - except KeyError: pass - else: return v, path[:] - - ## No, apparently, not. Find out our list of parents. - try: - parents = me._sectmap[sec]['@inherit'].replace(',', ' ').split() - except KeyError: - parents = [] - - ## Initially we have no idea. - value = None - winner = None - - ## Go through our parents and ask them what they think. - map[sec] = True, None - for p in parents: - - ## See whether we get an answer. If not, keep on going. - v, pp = me._get(p, key, map, path) - if v is None: continue - - ## If we got an answer, check that it matches any previous ones. - if value is None: - value = v - winner = pp - elif value != v: - raise AmbiguousOptionError(key, winner, value, pp, v) - - ## That's the best we could manage. - map[sec] = False, value - return value, winner - - finally: - ## Remove us from the path again. - path.pop() - - def get(me, sec, key, resolvep = True): - """ - Retrieve the value of KEY from section SEC. - """ - - ## Special handling for the `name' key. - if key == 'name': - value = me._sectmap[sec].get('name', sec) - else: - value, _ = me._get(sec, key) - if value is None: - raise MissingKeyException(sec, key) - - ## Expand the value and return it. - return me._expand(sec, value, resolvep) - - def items(me, sec, resolvep = True): - """ - Return a list of (NAME, VALUE) items in section SEC. - - This extends the default method by handling the inheritance chain. - """ - - ## Initialize for a depth-first walk of the inheritance graph. - d = {} - visited = {} - basesec = sec - stack = [sec] - - ## Visit nodes, collecting their keys. Don't believe the values: - ## resolving inheritance is too hard to do like this. - while stack: - sec = stack.pop() - if sec in visited: continue - visited[sec] = True - - for key, value in me._sectmap[sec].iteritems(): - if key == '@inherit': stack += value.replace(',', ' ').split() - else: d[key] = None - - ## Now collect the values for the known keys, one by one. - items = [] - for key in d: items.append((key, me.get(basesec, key, resolvep))) - - ## And we're done. - return items - ###-------------------------------------------------------------------------- ### Command-line handling. @@ -490,22 +744,24 @@ def output(conf, cdb): This is where the special `user' and `auto' database entries get set. """ auto = [] - for sec in sorted(conf.sections()): - if sec.startswith('@'): + for sec in sorted(conf.sections(), key = lambda sec: sec.name): + if sec.name.startswith('@'): continue - elif sec.startswith('$'): - label = sec + elif sec.name.startswith('$'): + label = sec.name else: - label = 'P%s' % sec - if conf.has_option(sec, 'auto') and \ - conf.get(sec, 'auto') in ('y', 'yes', 't', 'true', '1', 'on'): - auto.append(sec) - if conf.has_option(sec, 'user'): - cdb.add('U%s' % conf.get(sec, 'user'), sec) - url = M.URLEncode(laxp = True, semip = True) - for key, value in sorted(conf.items(sec), key = lambda (k, v): k): + label = 'P%s' % sec.name + try: a = sec.get('auto') + except MissingKeyException: pass + else: + if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name) + try: u = sec.get('user') + except MissingKeyException: pass + else: cdb.add('U%s' % u, sec.name) + url = M.URLEncode(semip = True) + for key in sorted(sec.items()): if not key.startswith('@'): - url.encode(key, ' '.join(M.split(value)[0])) + url.encode(key, sec.get(key)) cdb.add(label, url.result) cdb.add('%AUTO', ' '.join(auto)) cdb.finish() @@ -517,8 +773,12 @@ def main(): cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new') else: cdb = CDBFake() - conf = getconf(args[1:]) - output(conf, cdb) + try: + conf = getconf(args[1:]) + output(conf, cdb) + except ExpectedError, e: + M.moan(str(e)) + exit(2) if __name__ == '__main__': main()