X-Git-Url: https://git.distorted.org.uk/~mdw/rsync-backup/blobdiff_plain/e0de3610917976a1cade67f4c4a9975144bdf628..80d1feec28d0965b3772017b7f1681796bd07677:/fshash.in diff --git a/fshash.in b/fshash.in index 4f8b1c7..525b0f0 100644 --- a/fshash.in +++ b/fshash.in @@ -27,6 +27,7 @@ from sys import argv, exit, stdin, stdout, stderr import os as OS import re as RX import time as T +import errno as E import stat as ST import optparse as OP import hashlib as H @@ -331,6 +332,11 @@ class HashCache (object): if not me._db: die("no cache database") + def forget(me, ino): + me.need_db() + c = me._db.cursor() + c.execute('DELETE FROM hash WHERE ino = ?', [ino]) + def reset(me): me.need_db() c = me._db.cursor() @@ -450,6 +456,109 @@ class Reporter (object): fmt.mtime(), fmt.size(), fmt.name()) ###-------------------------------------------------------------------------- +### Database clearing from diff files. + +R_HUNK = RX.compile(r'^@@ -\d+,(\d+) \+\d+,(\d+) @@$') + +def clear_entry(db, lno, line): + + good = True + + if line.startswith('['): + pos = line.find(']') + if pos < 0: + moan("failed to parse file entry (type field; line %d)" % lno) + return False + ty = line[1:pos].strip() + rest = line[pos + 1:] + hash = None + else: + ff = line.split(None, 1) + if len(ff) != 2: + moan("failed to parse file entry (field split; line %d)" % lno) + return False + ty = 'regular-file' + hash, rest = ff + + ff = rest.split(None, 5) + if len(ff) != 6: + moan("failed to parse file entry (field split; line %d)" % lno) + return False + ino, mode, uidgid, mtime, sz, name = ff + + if ty != 'symbolic-link': + target = None + else: + nn = name.split(' -> ', 1) + if len(nn) != 2: + moan("failed to parse file entry (name split; line %d)" % lno) + return False + name, target = nn + target = target.decode('string_escape') + name = name.decode('string_escape') + + try: + st = OS.lstat(name) + except OSError, e: + moan("failed to stat `%s': %s" % (name, e.strerror)) + if e.errno != E.ENOENT: good = False + else: + print "Clear cache entry for `%s'" % name + db.forget(st.st_ino) + + return good + +def clear_cache(db): + + ## Work through the input diff file one line at a time. + diffstate = 'gap' + lno = 0 + good = True + for line in stdin: + if line.endswith('\n'): line = line[:-1] + lno += 1 + + ## We're in a gap between hunks. Find a hunk header and extract the line + ## counts. + if diffstate == 'gap': + m = R_HUNK.match(line) + if m: + oldlines = int(m.group(1)) + newlines = int(m.group(2)) + diffstate = 'hunk' + hdrlno = lno + + ## We're in a hunk. Keep track of whether we've reached the end, and + ## discard entries from the cache for mismatching lines. + elif diffstate == 'hunk': + if len(line) == 0: + moan("empty line in diff hunk (line %d)" % lno) + good = False + ty = line[0] + if ty == ' ': + oldlines -= 1; newlines -= 1 + elif ty == '+': + newlines -= 1 + if not clear_entry(db, lno, line[1:]): good = False + elif ty == '-': + oldlines -= 1 + if not clear_entry(db, lno, line[1:]): good = False + else: + moan("incomprehensible line in diff hunk (line %d)" % lno) + good = false + if oldlines < 0 or newlines < 0: + moan("inconsistent lengths in diff hunk header (line %d)" % hdrlno) + good = False + if oldlines == newlines == 0: + diffstate = 'gap' + + if diffstate == 'hunk': + moan("truncated diff hunk (started at line %d)" % hdrlno) + good = False + + return good + +###-------------------------------------------------------------------------- ### Main program. FMTMAP = { @@ -457,7 +566,7 @@ FMTMAP = { 'find0': lambda f: enum_find0(stdin, f) } op = OP.OptionParser( - usage = '%prog [-a] [-c CACHE] [-f FORMAT] [-H HASH] [FILE ...]', + usage = '%prog [-au] [-c CACHE] [-f FORMAT] [-H HASH] [FILE ...]', version = '%%prog, version %s' % VERSION, description = '''\ Print a digest of a filesystem (or a collection of specified files) to @@ -475,24 +584,36 @@ for short, long, props in [ ('-f', '--files', { 'dest': 'files', 'metavar': 'FORMAT', 'type': 'choice', 'choices': FMTMAP.keys(), 'help': 'read files to report in the given FORMAT' }), + ('-u', '--udiff', { 'action': 'store_true', 'dest': 'udiff', + 'help': 'read diff from stdin, clear cache entries' }), ('-H', '--hash', { 'dest': 'hash', 'metavar': 'HASH', ##'type': 'choice', 'choices': H.algorithms, 'help': 'use HASH as the hash function' })]: op.add_option(short, long, **props) opts, args = op.parse_args(argv) -if not opts.files and len(args) <= 1: - die("no filename sources: nothing to do") -db = HashCache(opts.cache, opts.hash) -if opts.all: - db.reset() -rep = Reporter(db) -if opts.files: - FMTMAP[opts.files](rep.file) -for dir in args[1:]: - enum_walk(dir, rep.file) -if opts.all: - db.prune() -db.flush() +if opts.udiff: + if opts.cache is None or opts.all or opts.files or len(args) > 2: + die("incompatible options: `-u' requires `-c CACHE', forbids others") + db = HashCache(opts.cache, opts.hash) + if len(args) == 2: OS.chdir(args[1]) + good = True + if not clear_cache(db): good = False + if good: db.flush() + else: exit(2) +else: + if not opts.files and len(args) <= 1: + die("no filename sources: nothing to do") + db = HashCache(opts.cache, opts.hash) + if opts.all: + db.reset() + rep = Reporter(db) + if opts.files: + FMTMAP[opts.files](rep.file) + for dir in args[1:]: + enum_walk(dir, rep.file) + if opts.all: + db.prune() + db.flush() ###----- That's all, folks --------------------------------------------------