From 99248ed2ee402c196c3e83a19cfe6c1fa1c3cf98 Mon Sep 17 00:00:00 2001
From: Mark Wooding <mdw@distorted.org.uk>
Date: Sun, 18 Dec 2011 20:27:33 +0000
Subject: [PATCH] initial checkin; mostly complete

---
 .gitignore        |   6 +
 .links            |   3 +
 LAYOUT            | 137 +++++++++
 Makefile.am       | 177 +++++++++++
 bkp.dump.in       |  55 ++++
 bkp.in            | 328 ++++++++++++++++++++
 bkpacct           | 102 +++++++
 bkpadmin.8        |  88 ++++++
 bkpadmin.in       | 887 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 bkpfunc.sh        |  99 ++++++
 configure.ac      |  57 ++++
 lvm-rmsnap.8      | 124 ++++++++
 lvm-rmsnap.in     | 462 ++++++++++++++++++++++++++++
 rfreezefs.8       | 317 +++++++++++++++++++
 rfreezefs.c       | 633 ++++++++++++++++++++++++++++++++++++++
 rmt.c             | 449 +++++++++++++++++++++++++++
 snap.8.in         | 172 +++++++++++
 snap.in           | 150 +++++++++
 snap.lvm          | 105 +++++++
 snap.lvm.8        | 104 +++++++
 snap.rfreezefs.8  |  76 +++++
 snap.rfreezefs.in | 174 +++++++++++
 snap.ro           | 133 ++++++++
 snap.ro.8         |  69 +++++
 snap.trivial      |  82 +++++
 snap.trivial.8    |  39 +++
 snaptab.5.in      | 305 +++++++++++++++++++
 27 files changed, 5333 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .links
 create mode 100644 LAYOUT
 create mode 100644 Makefile.am
 create mode 100755 bkp.dump.in
 create mode 100755 bkp.in
 create mode 100755 bkpacct
 create mode 100644 bkpadmin.8
 create mode 100755 bkpadmin.in
 create mode 100755 bkpfunc.sh
 create mode 100644 configure.ac
 create mode 100644 lvm-rmsnap.8
 create mode 100755 lvm-rmsnap.in
 create mode 100644 rfreezefs.8
 create mode 100644 rfreezefs.c
 create mode 100644 rmt.c
 create mode 100644 snap.8.in
 create mode 100755 snap.in
 create mode 100755 snap.lvm
 create mode 100644 snap.lvm.8
 create mode 100644 snap.rfreezefs.8
 create mode 100755 snap.rfreezefs.in
 create mode 100755 snap.ro
 create mode 100644 snap.ro.8
 create mode 100755 snap.trivial
 create mode 100644 snap.trivial.8
 create mode 100644 snaptab.5.in

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ab9943e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+COPYING
+Makefile.in
+aclocal.m4
+autom4te.cache
+config
+configure
diff --git a/.links b/.links
new file mode 100644
index 0000000..f1dbbb1
--- /dev/null
+++ b/.links
@@ -0,0 +1,3 @@
+config/auto-version
+config/confsubst
+COPYING
diff --git a/LAYOUT b/LAYOUT
new file mode 100644
index 0000000..55c51d4
--- /dev/null
+++ b/LAYOUT
@@ -0,0 +1,137 @@
+Backup volume layout
+
+* Volume group structure
+
+Each backup volume group is named with a /tag/ to distinguish it from
+others.  A backup volume group is named =bkp-TAG=.  It has two logical
+volumes named =meta= and =crypt=.  The =meta= volume contains a small
+unencrypted ext2 filesystem; the =crypt= volume contains a much larger
+LUKS-encrypted ext2 filesystem.
+
+
+* The metadata volume
+
+The =meta= volume contains metadata about the encrypted volume.  The
+root directory of the volume should have a directory =cur= containing
+the following files.
+
+  + =blob= :: A *seccure*-encrypted copy of the LUKS `passphrase' for
+    the encrypted volume.  The decryption key is =priv/backup-disk=.
+    The `passphrase' is raw binary data; currently this is 512 bytes,
+    though this isn't part of the specification.
+
+  + =keys.tgz= :: A partial archive of the =/etc/keys/= directory;
+    specifically, this contains the =pub= and =recov= directories, and
+    the =README= file.
+
+  + =passwd= and =group= :: Fragments of the server's *passwd* and
+    *group* files, which can be used to decode the uid and gid numbers
+    in the volumes.
+
+  + =hashes= :: A *sha256sum*-format list of the hashes of the other
+    files.
+
+  + =hashes.sig= :: A *seccure* signature on the =hashes= file, which
+    can be verified using the key =pub/backup-auth.pub=.
+
+Files in =cur= other than =hashes= and =hashes.sig= which are not listed
+in the =hashes= file are spurious and should not be trusted.
+
+There may be a =new= directory in the volume root; this contains a
+partially written replacement for =cur=.  This replacement is performed
+as follows.
+
+  + Delete =new= if it already exists.
+  + Create =new= and populate it with the appropriate files.
+  + Rename =cur= to =old=.
+  + Rename =new= to =cur=.
+  + Delete =old=.
+
+There is a point in this process when the =cur= directory does not
+exist: there are then =old= and =new= directories, and /both/ of them
+contain valid files.  The tools provided do not handle this situation:
+it must be fixed manually: one of the directories must be renamed to
+=cur= and the other deleted.
+
+It is possible to specify a properly atomic update protocol, but this
+doesn't seem worth the additional complexity of fiddling with symbolic
+links and the more awkward recovery procedure.
+
+
+* The encrypted volume
+
+The =crypt= volume contains archived assets arranged in a hierarchy.
+(An `asset' is a thing that needs backing up.  It's a bit more general
+than just a filesystem, since I also want to back up things like
+databases which are rather weird.)
+
+The topmost level splits the archive by hostname; the second level
+splits a host's assets by asset name.
+
+The third level splits out the dumps of an asset by date: each directory
+is named =YYYY-MM-DD#N.L=, indicating the date on which the dump was
+taken, and the dump level.  The number =N= is a counter to distinguish
+multiple dumps taken on the same day.  The number =L= (`level') is an
+integer which explains how to combine the dump with earlier dumps to
+perform a complete restore of the asset: a level-zero dump is complete;
+a level-$n$ dump contains everything since the previous level-$n$ or
+lower dump.  The algorithm to restore up to a level-$n$ dump taken at a
+time $t_1$ is therefore as follows.
+
+  1. Identify the most recent level-0 dump prior to $t_1, and restore
+     it.  Let $t$ be the time of that level-0 dump.
+
+  2. Identify the lowest numbered dump level occurring after $t$ and
+     before or at $t_1$; let $m$ be this level.  Restore all of these
+     level-$m$ dumps, in order.
+
+  3. If $m = n$ then the restore is complete.  Otherwise update $t$ to
+     be the time of the most recent level-$m$ dump prior to $t_1$ and go
+     back to step 2.
+
+The third-level directory contains these files:
+
+  + =hashes= :: A *sha256sum*-format list of the hashes of the dump
+    files.
+
+  + =hashes.sig= :: A *seccure* signature on the =hashes= file, which
+    can be verified using the key =pub/backup-auth.pub=.
+
+It also contains other files which are specific to the kind of asset
+being stored.  All of these files should be listed in the =hashes= file;
+there should be no other files present.
+
+In addition to the date/level directories, the third level may also have
+a directory =prepare=, which contains a partial dump in progress and
+various bits of metadata about it.  The contents of this directory are
+not specified, and should not be trusted.  Finally, there may be a
+directory =failed= which contains archive directories as above, but
+these directories are incomplete, and retained for diagnostic purposes.
+
+
+* Users and groups
+
+Each host is assigned a user and a group, both named =bkp-HOST=; each of
+the users is also a member of the group =backup=.  All of the permanent
+files and directories in the encrypted volume are owned by =root=.  All
+of the permanent directories within a host's tree are owned by =root=
+and group-owned by the host's group, and have mode 2755; the files
+within a dump are group-owned by the relevant host's group, with
+mode 640.  Any =failed= directories are owned and group-owned by =root=
+and have mode 2755; the partial archives within are owned and
+group-owned by =root= and have mode 640.  Any =prepare= directories
+have the usual permissions, but files directories within it may have
+other permissions, and may be under hostile control.
+
+This structure is designed to protect existing archives from hosts which
+are later compromised.  No special precautions against attackers having
+open files are taken while fixing up the permissions on a completed
+dump, since the relevant attackers could just as easily have corrupted
+the dump earlier.
+
+
+* COMMENT Emacs cruft
+
+# Local variables:
+# mode: org
+# End:
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..d27c011
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,177 @@
+### -*-makefile-*-
+###
+### Build script for distorted.org.uk backup system
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+bin_PROGRAMS		 =
+sbin_PROGRAMS		 =
+bin_SCRIPTS		 =
+sbin_SCRIPTS		 =
+noinst_SCRIPTS		 =
+
+pkglibexec_PROGRAMS	 =
+pkglib_DATA		 =
+
+man_MANS		 =
+dist_man_MANS		 =
+
+EXTRA_DIST		 =
+CLEANFILES		 =
+DISTCLEANFILES		 =
+
+snaplibexecdir		 = $(libexecdir)/snap
+snaplibexec_SCRIPTS	 =
+
+bkplibexecdir		 = $(libexecdir)/bkp
+bkplibexec_SCRIPTS	 =
+
+AM_CFLAGS		 = $(mLib_CFLAGS) $(nettle_CFLAGS)
+
+###--------------------------------------------------------------------------
+### Substitution of configuration data.
+
+confsubst = $(top_srcdir)/config/confsubst
+EXTRA_DIST		+= config/confsubst
+
+SUBSTVARS = \
+        PACKAGE="$(PACKAGE)" VERSION="$(VERSION)" \
+	PERL="$(PERL)" \
+        prefix="$(prefix)" exec_prefix="$(exec_prefix)" \
+	sbindir="$(sbindir)" \
+        sysconfdir="$(sysconfdir)" \
+        pkglibdir="$(pkglibdir)" \
+	bkplibexecdir="$(bkplibexecdir)" \
+	snaplibexecdir="$(snaplibexecdir)" \
+	pkglibexecdir="$(pkglibexecdir)"
+
+SUBST = $(AM_V_GEN)$(confsubst)
+
+###--------------------------------------------------------------------------
+### Snapshot tools.
+
+## Remote filesystem freezing tool.
+sbin_PROGRAMS		+= rfreezefs
+dist_man_MANS		+= rfreezefs.8
+rfreezefs_SOURCES	 = rfreezefs.c
+rfreezefs_LDADD		 = $(mLib_LIBS)
+
+## LVM snapshot removal nanny.
+sbin_SCRIPTS		+= lvm-rmsnap
+dist_man_MANS		+= lvm-rmsnap.8
+EXTRA_DIST		+= lvm-rmsnap.in
+CLEANFILES		+= lvm-rmsnap
+lvm-rmsnap: lvm-rmsnap.in Makefile
+	$(SUBST) $(srcdir)/lvm-rmsnap.in $(SUBSTVARS) >lvm-rmsnap.new && \
+		chmod +x lvm-rmsnap.new && mv lvm-rmsnap.new lvm-rmsnap
+
+## Snapshot tool.
+sbin_SCRIPTS		+= snap
+EXTRA_DIST		+= snap.in
+CLEANFILES		+= snap
+snap: snap.in Makefile
+	$(SUBST) $(srcdir)/snap.in $(SUBSTVARS) >snap.new && \
+		chmod +x snap.new && mv snap.new snap
+
+man_MANS		+= snap.8
+EXTRA_DIST		+= snap.8.in
+CLEANFILES		+= snap.8
+snap.8: snap.8.in Makefile
+	$(SUBST) $(srcdir)/snap.8.in $(SUBSTVARS) >snap.8.new && \
+		mv snap.8.new snap.8
+
+man_MANS		+= snaptab.5
+EXTRA_DIST		+= snaptab.5.in
+CLEANFILES		+= snaptab.5
+snaptab.5: snaptab.5.in Makefile
+	$(SUBST) $(srcdir)/snaptab.5.in $(SUBSTVARS) >snaptab.5.new && \
+		mv snaptab.5.new snaptab.5
+
+## Snapshot handlers.
+snaplibexec_SCRIPTS	+= snap.lvm
+dist_man_MANS		+= snap.lvm.8
+EXTRA_DIST		+= snap.lvm
+
+snaplibexec_SCRIPTS	+= snap.ro
+dist_man_MANS		+= snap.ro.8
+EXTRA_DIST		+= snap.ro
+
+snaplibexec_SCRIPTS	+= snap.trivial
+dist_man_MANS		+= snap.trivial.8
+EXTRA_DIST		+= snap.trivial
+
+snaplibexec_SCRIPTS	+= snap.rfreezefs
+dist_man_MANS		+= snap.rfreezefs.8
+EXTRA_DIST		+= snap.rfreezefs.in
+CLEANFILES		+= snap.rfreezefs
+snap.rfreezefs: snap.rfreezefs.in Makefile
+	$(SUBST) $(srcdir)/snap.rfreezefs.in $(SUBSTVARS) \
+			>snap.rfreezefs.new && \
+		chmod +x snap.rfreezefs.new && \
+		mv snap.rfreezefs.new snap.rfreezefs
+
+###--------------------------------------------------------------------------
+### Backup utilities.
+
+## Hashing rmt clone.
+pkglibexec_PROGRAMS	+= rmt
+rmt_SOURCES		 = rmt.c
+rmt_LDADD		 = $(mLib_LIBS) $(nettle_LIBS)
+
+## Backup archive administration tool.
+sbin_SCRIPTS		+= bkpadmin
+##dist_man_MANS		+= bkpadmin.8
+EXTRA_DIST		+= bkpadmin.in
+CLEANFILES		+= bkpadmin
+bkpadmin: bkpadmin.in Makefile
+	$(SUBST) $(srcdir)/bkpadmin.in $(SUBSTVARS) >bkpadmin.new && \
+		chmod +x bkpadmin.new && mv bkpadmin.new bkpadmin
+
+## Backup client driver.
+sbin_SCRIPTS		+= bkp
+##dist_man_MANS		+= bkp.8
+EXTRA_DIST		+= bkp.in
+CLEANFILES		+= bkp
+bkp: bkp.in Makefile
+	$(SUBST) $(srcdir)/bkp.in $(SUBSTVARS) >bkp.new && \
+		chmod +x bkp.new && mv bkp.new bkp
+
+## Type handler utility library.
+pkglib_DATA		+= bkpfunc.sh
+EXTRA_DIST		+= bkpfunc.sh
+
+## Backup type handlers.
+bkplibexec_SCRIPTS	+= bkp.dump
+##dist_man_MANS		+= bkp.dump.8
+EXTRA_DIST		+= bkp.dump.in
+CLEANFILES		+= bkp.dump
+bkp.dump: bkp.dump.in Makefile
+	$(SUBST) $(srcdir)/bkp.dump.in $(SUBSTVARS) >bkp.dump.new && \
+		chmod +x bkp.dump.new && mv bkp.dump.new bkp.dump
+
+###--------------------------------------------------------------------------
+### Distribution.
+
+EXTRA_DIST		+= config/auto-version
+
+dist-hook::
+	echo $(VERSION) >$(distdir)/RELEASE
+
+###----- That's all, folks --------------------------------------------------
diff --git a/bkp.dump.in b/bkp.dump.in
new file mode 100755
index 0000000..bb6daa4
--- /dev/null
+++ b/bkp.dump.in
@@ -0,0 +1,55 @@
+#! /bin/sh
+### -*-sh-*-
+###
+### Backup ext[2-4] filesystem using dump
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+set -e
+. @pkglibdir@/bkpfunc.sh
+
+## Check arguments and environment.
+preflight
+case $# in 0 | 1) echo >&2 "usage: $QUIS TAG DEVICE DUMPARGS ..." ;; esac
+tag=$1 device=$2; shift 2
+
+## Transform the date format.
+since=$(datefmt "%a %b %e %H:%M:%S %Y %z" "$BKP_LASTDATE")
+
+## Make a snapshot and dump it.
+snap=$(snap "$device" tag=dump)
+set +e
+run \
+  env RSH=ssh RMT=$BKP_RMT \
+  dump -${BKP_LEVEL}q -z9 -T"$since" \
+    -f"$BKP_HOST:$BKP_ASSET/$tag.dump" -Q"$tag.qfa" \
+    "$@" \
+    "/dev/$snap"
+rc=$?; set -e
+snap -u "$device" tag=dump
+case $rc in 0) ;; *) exit $rc ;; esac
+
+scp "$BKP_HOST:$BKP_TARGET/$tag.dump.hash" "$tag.dump.hash"
+hash=$(cat "$tag.dump.hash")
+ssh "$BKP_HOST" rm "$BKP_TARGET/$tag.dump.hash"
+scp "$tag.qfa" "$BKP_HOST:$BKP_TARGET/$tag.qfa"
+bkpadmin hash "$BKP_ASSET" "$tag.dump" "$hash"
+
+###----- That's all, folks --------------------------------------------------
diff --git a/bkp.in b/bkp.in
new file mode 100755
index 0000000..050319b
--- /dev/null
+++ b/bkp.in
@@ -0,0 +1,328 @@
+#! @PERL@
+### -*-perl-*-
+###
+### Run backups as instructed by a configuration file
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+use Data::Dumper;
+use Errno qw(:POSIX);
+use Fcntl qw(:mode);
+use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
+use IO::Handle;
+use MIME::Base64;
+use POSIX;
+use Text::ParseWords;
+
+our $VERSION = "@VERSION@";
+
+our %C = ( etc		=> "@sysconfdir@",
+	   sbin		=> "@sbindir@",
+	   libexec	=> "@pkglibexecdir@",
+	   bkp		=> "@bkplibexecdir@" );
+
+###--------------------------------------------------------------------------
+### Utilities.
+
+our $EVAL = 0;
+(our $QUIS = $0) =~ s:^.*/::;
+sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
+sub fail ($) {
+  my ($msg) = @_;
+  if ($EVAL) { die $msg . "\n"; }
+  else { whine $msg; exit $! || ($? >> 8) || 255; }
+}
+
+sub try (&) { my ($body) = @_; local $EVAL = 1; &$body (); }
+
+sub decodewait ($) {
+  my ($rc) = @_;
+  ## Return a string describing the process exit status RC.
+
+  if (!$rc) { return "ok"; }
+  elsif ($rc & 255) { return sprintf "signal %d", $rc & 127; }
+  else { return sprintf "rc = %d", $rc >> 8; }
+}
+
+sub shellquote ($) {
+  my ($word) = @_;
+  ## Quotify WORD so that a shell won't mangle it.
+
+  $word =~ s/'/'\\''/g;
+  return "'" . $word . "'";
+}
+
+sub run ($@) {
+  my ($what, @args) = @_;
+  ## Run a program with ARGS, collecting and returning its output.
+
+  open my $f, "-|", @args or fail "open pipe ($what): $!";
+  chomp (my @out = <$f>);
+  if (!close $f) {
+    $? and fail "$what failed: " . decodewait $?;
+    fail "close pipe ($what)";
+  }
+  return wantarray ? @out : $out[0];
+}
+
+sub now () {
+  ## Report the current time.
+
+  return strftime "%Y-%m-%d %H:%M:%S %z", localtime;
+}
+
+###--------------------------------------------------------------------------
+### Parse command line.
+
+our $USAGE = "usage: $QUIS [-n] [-a ASSET] [-c FILE] [KEY=VALUE ...]";
+sub version { print "$QUIS, version $VERSION\n"; }
+sub help {
+  print <<EOF;
+$USAGE
+
+Options:
+  -h, --help		Show this help text.
+  -v, --version		Show the program version number.
+  -a, --asset=ASSET	Back up ASSET, rather than all assets.
+  -c, --config=FILE	Use configuration FILE, not $CONF.
+  -n, --noact		Don't actually run the dump programs.
+EOF
+}
+
+our $CONF = "$C{etc}/bkptab";
+our @ASSET = ();
+our $NOACT = 0;
+GetOptions('help|h|?'		=> sub { version; help; exit; },
+	   'version|v'		=> sub { version; exit; },
+	   'asset|a=s'		=> \@ASSET,
+	   'config-file|c=s'	=> \$CONF,
+	   'noact|n'		=> \$NOACT)
+  or do { print STDERR $USAGE, "\n"; exit 1; };
+
+###--------------------------------------------------------------------------
+### Parse the configuration file.
+
+our %OVERRIDE = ();
+our %SECMAP = ( CONFIG	=> sub {
+		  my ($k, $v) = @_;
+		  $C{$k} = $v unless $OVERRIDE{$k};
+		} );
+
+our %DUMP = ();
+our @ORDER = ();
+our %ASSET = map { $_ => 1 } @ASSET;
+
+## Override configuration from the environment.
+while (my ($e, $v) = each %ENV) {
+  next unless $e =~ /^BKP_([_A-Za-z0-9]+)$/;
+  (my $k = $1) =~ tr/_A-Z/-a-z/;
+  $C{$k} = $v;
+  $OVERRIDE{$k} = 1;
+}
+
+## Handy sub for extracting a configuration variable.
+sub config ($) {
+  my ($k) = @_;
+  exists $C{$k} or fail "$CONF:$.: unknown config variable `$k'";
+  return $C{$k};
+}
+
+## Parse the configuration file proper.
+open CF, "<", $CONF or fail "open config ($CONF): $!";
+my $kvfunc = $SECMAP{CONFIG};
+while (my $line = <CF>) {
+
+  ## Handle continuation lines and comments.
+  chomp $line;
+  while ($line =~ /\\\s*$/) {
+    chomp (my $more = <CF>);
+    $line =~ s/\\\s*$/$more/;
+  }
+  next if $line =~ /^\s*([#;]|$)/;
+
+  if ($line =~ /^\s*\[\s*(\S.*\S|\S|)\s*\]\s*/) {
+    ## Section header: set the kvfunc appropriately.
+
+    if (exists $SECMAP{$1}) {
+      $kvfunc = $SECMAP{$1};
+    } elsif (!@ASSET || $ASSET{$1}) {
+      my $asset = $1;
+      if (!exists $DUMP{$asset}) {
+	$DUMP{$asset} = [];
+	push @ORDER, $asset;
+      }
+      $kvfunc = sub {
+	my ($k, $v) = @_;
+	push @{$DUMP{$asset}}, [$k, shellwords $v];
+      }
+    } else {
+      $kvfunc = sub { };
+    }
+  } elsif ($line =~ /\s*(\S.*\S|\S)\s*[=:]\s*(\S.*\S|\S|)\s*$/) {
+    ## Assignment line.  Process this according to the current kvfunc.
+
+    my ($k, $v) = ($1, $2);
+    $v =~ s/\$\{([^}]+)\}/config $1/ge;
+    $kvfunc->($1, $2);
+  } else {
+    ## Something else: it's an error.
+
+    fail "$CONF:$.: unrecognized line";
+  }
+}
+
+## Done.
+close CF or fail "close config ($CONF): $!";
+@ORDER or fail "no matching assets to dump";
+
+## Export the configuration.
+while (my ($k, $v) = each %C) {
+  next unless $k =~ /^[-A-Za-z0-9]+$/;
+  (my $e = $k) = tr/-a-z/_A-Z/;
+  $ENV{$k} = $v;
+}
+
+###--------------------------------------------------------------------------
+### Establish a safe temporary directory.
+
+sysopen RAND, "/dev/urandom", O_RDONLY or fail "open (random): $!";
+my $win = 0;
+our $TMPDIR;
+for (my $i = 0; $i < 1000; $i++) {
+  my $n = sysread RAND, my $buf, 12;
+  if (!defined $n) { fail "read (random): $!"; }
+  elsif ($n < 12) { fail "short read (random)"; }
+  my $rand = encode_base64 $buf, "";
+  $TMPDIR = ($ENV{TMPDIR} // "/tmp") . "/bkp.$$.$rand";
+  $win = 1, last if mkdir $TMPDIR, 0700;
+  fail "mkdir (tmp): $!" unless $! == ENOENT;
+}
+$win or fail "failed to make temp directory";
+$ENV{BKP_TMPDIR} = $TMPDIR;
+END { chdir "/"; system "rm", "-rf", $TMPDIR if defined $TMPDIR; }
+close RAND;
+
+chdir $TMPDIR or fail "chdir ($TMPDIR): $!";
+
+###--------------------------------------------------------------------------
+### Wade through the list of things to do, dumping assets.
+
+sub bkpadmin ($@) {
+  my ($op, @args) = @_;
+  ## Invoke an administration operation.
+
+  return run "bkpadmin $op",
+    "ssh", $C{host},
+    join " ", map { shellquote $_ } qw(userv root bkpadmin), $op, @args;
+}
+
+## Make sure there's a volume mounted.
+bkpadmin "mount";
+
+## Go through each asset dumping all of the tags.
+for my $asset (@ORDER) {
+
+  ## Start a log for this asset.
+  if ($NOACT) {
+    open LOG, ">&", STDERR or fail "dup stderr (log)";
+  } else {
+    open LOG, ">", "$asset.log" or fail "open ($asset.log): $!";
+  }
+
+  ## Find out when the last dump was done.
+  my ($level, $date, $time, $tz) = split " ", bkpadmin "level", $asset;
+  $ENV{BKP_LEVEL} = $level;
+  $ENV{BKP_LASTDATE} = my $lastdate = "$date $time $tz";
+  $ENV{BKP_ASSET} = $asset;
+
+  ## Prepare the dump.
+  unless ($NOACT) {
+    my $target = bkpadmin "prep", $asset, $level;
+    $ENV{BKP_TARGET} = $target;
+  }
+
+  ## Make sure we can dispose of the results if there's a Perl failure
+  ## somewhere here.
+  try {
+
+    ## Start writing the log.
+    printf LOG "%s: Commence dump of asset `%s' at level %d (since %s)\n",
+      now, $asset, $level, $lastdate;
+
+    ## Dump the individual tags.
+    my $lose = 0;
+    for my $dump (@{$DUMP{$asset}}) {
+      my ($tag, $type, @args) = @$dump;
+
+      ## Make a log note.
+      printf LOG "%s: Dump tag `%s' (%s) begins\n", now, $tag, $type;
+      flush LOG or fail "write ($asset.log): $!";
+
+      ## Run the dump helper.
+      if ($NOACT) { $? = 0; }
+      else {
+	defined (my $kid = fork) or fail "fork: $!";
+	unless ($kid) {
+	  open STDOUT, ">&", LOG and
+	    open STDERR, ">&", LOG or
+	      fail "dup: $!";
+	  exec "$C{bkp}/bkp.$type", "$tag", @args;
+	  fail "exec (bkp.$type): $!";
+	}
+	waitpid $kid, 0 or fail "waitpid: $!";
+      }
+
+      ## Deal with the aftermath.
+      if ($?) {
+	printf LOG "%s: Dump tag `%s' failed (%s)\n", now, $tag,
+	  decodewait $?;
+	printf STDERR "%s: %s: Dump asset `%s' tag `%s' FAILED\n",
+	  $QUIS, now, $asset, $tag;
+	$lose++;
+      } elsif ($NOACT) {
+	printf LOG "%s: Dump tag `%s' not performed (--noact)\n", now, $tag;
+      } else {
+	printf LOG "%s: Dump tag `%s' ok\n", now, $tag;
+      }
+    }
+
+    ## Report completion of the asset.
+    printf LOG "%s: Dump of asset `%s' completed %s\n", now, $asset,
+      $lose == 0 ? "successfully" : "with $lose failures";
+    error LOG and fail "write ($asset.log): $!";
+    close LOG or fail "close ($asset.log): $!";
+
+    ## Copy the log to the server and commit it.
+    unless ($NOACT) {
+      run "scp $asset.log",
+	"scp", "$asset.log", "$C{host}:$target/$asset.log";
+      bkpadmin $lose ? "fail" : "commit", $asset;
+    }
+  };
+
+  ## If anything failed above, then try to mark the asset as a failure and
+  ## abort.
+  if ($@) {
+    try { bkpadmin "fail", $asset; };
+    fail $@;
+  }
+}
+
+###----- That's all, folks --------------------------------------------------
diff --git a/bkpacct b/bkpacct
new file mode 100755
index 0000000..7ca97da
--- /dev/null
+++ b/bkpacct
@@ -0,0 +1,102 @@
+#! /bin/sh
+
+set -e
+
+quis=${0##*/}
+
+usage="usage: $quis [-nqv] HOST ..."
+
+verbose=nil
+noact=nil
+while getopts "hnvq" opt; do
+  case "$opt" in
+    h) echo "$usage"; exit ;;
+    n) noact=t verbose=t ;;
+    v) verbose=t ;;
+    q) verbose=nil ;;
+    *) echo >&2 "$usage"; exit 1 ;;
+  esac
+done
+shift $(( $OPTIND - 1 ))
+
+case $# in 0) echo "$usage"; exit 1 ;; esac
+
+defrun='
+run () {
+  case $verbose in t) echo >&2 "- $*" ;; esac
+  case $noact in nil) "$@" ;; esac
+}'
+eval "$defrun"
+
+if getent group backup >/dev/null; then
+  echo >&2 "$quis: group \`backup' already exists"
+else
+  run addgroup --gid 200 backup
+fi
+
+for host in "$@"; do
+
+  if getent passwd bkp-$host >/dev/null; then
+    echo >&2 "$quis: backup user \`bkp-$host' already exists"
+  else
+    uid=201
+    while { getent passwd $uid || getent group $uid; } >/dev/null; do
+      uid=$(( $uid + 1 ))
+    done
+    run addgroup --system --gid $uid bkp-$host
+    run adduser --system --uid $uid --gid $uid \
+      --home /var/lib/bkp/$host \
+      --shell /bin/bash \
+      --gecos "Backup user for host $host" \
+      --disabled-password \
+      bkp-$host
+  fi
+
+  getent group backup | {
+    IFS=: read name passwd gid members
+    case ",$members," in
+      ",bkp-$host,")
+	echo >&2 "$quis: user \`bkp-$host' already in group \`backup'"
+	;;
+      *)
+	run adduser bkp-$host backup
+	;;
+    esac
+  }
+
+  settings="verbose=$verbose noact=$noact"
+  run mkdir -p -m755 /var/lib/bkp/$host/.ssh
+  ssh root@$host "$settings; $defrun" '
+	cd $HOME
+	mkdir -p -m755 .ssh
+	cd .ssh
+	if [ ! -f id_rsa.pub ]; then
+	  genp=t
+	else
+	  genp=$(
+	    ssh-keygen -l -fid_rsa.pub | {
+	      read bits fpr fname type
+	      case "$bits,$type" in
+		*[!0-9]*,*)
+		  echo t
+		  ;;
+		*,"(RSA)")
+		  if [ $bits -ge 3072 ]; then echo nil; else echo t; fi
+		  ;;
+		*)
+		  echo t
+		  ;;
+	      esac
+	    }
+	  )
+	fi
+
+	case $genp in
+	  t)
+	    run ssh-keygen -trsa -fid_rsa -b3072 -N ""
+	    ;;
+	esac
+  '
+  run scp root@$host:.ssh/id_rsa.pub /var/lib/bkp/$host/.ssh/authorized_keys
+
+done
diff --git a/bkpadmin.8 b/bkpadmin.8
new file mode 100644
index 0000000..63aa340
--- /dev/null
+++ b/bkpadmin.8
@@ -0,0 +1,88 @@
+.TH bkpadmin 8 "28 November 2011" "distorted.org.uk backup"
+.SH NAME
+bkpadmin \- backup archive administration
+.SH SYNOPSIS
+.B bkpadmin
+.I command
+.RI [ argument ...]
+.PP
+Commands recognized:
+.PP
+.B help
+.br
+.B initvol
+.I tag
+.I device
+.br
+.B mount
+.RI [ tag ]
+.br
+.B umount
+.br
+.B initmeta
+.br
+.B chkmeta
+.br
+.B prep
+.I asset
+.I level
+.RI [ date
+.I time
+.IR tz ]
+.br
+.B abort
+.I asset
+.br
+.B fail
+.I asset
+.br
+.B level
+.I asset
+.br
+.B hash
+.I asset
+.I file
+.I hash
+.br
+.B commit
+.I asset
+.br
+.B check
+.I asset
+.I label
+.br
+.B catalogue
+.I asset
+.br
+.B outdated
+.I asset
+.br
+.B test
+.I command
+.RI [ argument ...]
+.SH DESCRIPTION
+The
+.B bkpadmin
+command assists with the maintenance of disk backup volumes.  A backup
+volume contains a hierarchy of backed up data, as follows.  The volume
+stores data for a number of client
+.IR hosts .
+Each host has a number of
+.I assets
+which need to be backed up.  Each time an asset is backed up, the backup
+files are collected together and assigned a
+.IR label .
+.PP
+The
+.I host
+names don't actually need to correspond to actual hosts, though that's
+the intent.  Rather, they correspond to mutually untrusting sources of
+backup data.  Each host is represented locally by a user named
+.BI bkp- host \fR.
+
+
+
+.SH BUGS
+There's far too much local policy embedded in here: LVM volume naming,
+user naming, key management, and so on.  Splitting this out would be a
+really good idea, though probably not for the faint of heart.
diff --git a/bkpadmin.in b/bkpadmin.in
new file mode 100755
index 0000000..ce7e7cb
--- /dev/null
+++ b/bkpadmin.in
@@ -0,0 +1,887 @@
+#! /bin/sh
+###
+### Manage the backup archive structure
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+set -e
+
+## Configuration and testing.
+: ${BKP=/mnt/bkp} ${META=/mnt/bkpmeta}
+: ${KEYS=/etc/keys}
+
+case $(id -u) in 0) ;; *) exec userv root bkpadmin "$@" ;; esac
+
+###--------------------------------------------------------------------------
+### Common utilities.
+
+quis=${0##*/}
+version="@VERSION@"
+
+moan () {
+  ## Print a complaint to standard error.
+
+  echo >&2 "$quis: $*"
+}
+
+die () {
+  ## Print a complaint and exit.
+
+  moan "$*"
+  exit 1
+}
+
+cleanups=""
+addcleanup () {
+  cmd=$1
+  ## Add a cleanup command CMD to the list.
+
+  case "$cleanups" in
+    ?*)
+      ;;
+    *)
+      trap 'rc=$?; for c in $cleanups; do $c; done; exit $rc' \
+	EXIT INT TERM
+      ;;
+  esac
+  cleanups=${cleanups+$cleanups }$cmd
+}
+
+rmtmp () { case ${tmpdir+t} in t) rm -rf "$tmpdir" ;; esac }
+addcleanup rmtmp
+mktmp () {
+  ## Make a temporary directory and output its name.
+
+  case "${tmpdir+t}" in
+    t)
+      ;;
+    *)
+      i=0
+      while :; do
+	r=$(openssl rand -base64 12)
+	tmpdir=${TMPDIR-/tmp}/$quis.$$.$r
+	if mkdir -m700 "$tmpdir" >/dev/null 2>&1; then break; fi
+	case $i in ???) die "failed to create temporary directory" ;; esac
+	i=$(( $i + 1 ))
+      done
+      ;;
+  esac
+  echo "$tmpdir"
+}
+
+###--------------------------------------------------------------------------
+### Command dispatch.
+
+case "${USERV_USER+t}" in t) uservp=t ;; *) uservp=nil ;; esac
+
+USAGE="COMMAND [ARGUMENT ...]"
+cmdname=""
+cmdargs=$USAGE
+cmds=""
+_defcmd () {
+  name=$1; shift; args=$*
+  ## Define a command unconditionally.
+
+  cmds="${cmds:+$cmds
+}$name $args"
+}
+
+defcmd () {
+  ## Define a command for privileged users only.
+
+  case $uservp in nil) _defcmd "$@" ;; esac
+}
+
+defucmd () {
+  ## Define a command usable via userv.
+
+  _defcmd "$@"
+}
+
+usage () {
+  ## Write a usage message for the current command.
+
+  echo "usage: $quis${cmdname:+ $cmdname}${cmdargs:+ $cmdargs}"
+}
+
+usage_err () {
+  ## Fail with a usage error.
+
+  usage >&2
+  exit 1
+}
+
+lookupcmd () {
+  cmd=$1
+  ## Try to loop up the command CMD.
+
+  while read cmdname cmdargs; do
+    case $cmdname in "$cmd") return ;; esac
+  done <<EOF
+$cmds
+EOF
+  die "unknown command \`$cmd'"
+}
+
+defucmd help
+cmd_help () {
+  case $# in 0) ;; *) usage_err ;; esac
+
+  cat <<EOF
+$quis, version $version
+
+usage: $quis $USAGE
+
+Commands provided:
+EOF
+  while read cmd args; do
+    echo "	$cmd${args:+ $args}"
+  done <<EOF
+$cmds
+EOF
+}
+
+###--------------------------------------------------------------------------
+### Utility functions.
+
+sign () {
+  file=$1
+  ## Sign the named FILE, producing a signature FILE.sig.
+
+  seccure-sign -F$KEYS/priv/backup-auth -cp256 -s"$file.sig" <"$file"
+}
+
+checkhost () {
+  ## Check that a host is defined.
+
+  case "${host+t}" in
+    t) ;; *) die "no host defined (use \`-H')" ;;
+  esac
+}
+
+checkthing () {
+  thing=$1 good=$2 what=$3 string=$4
+  ## Check that STRING is a valid THING -- i.e., it only consists of GOOD
+  ## characters.
+
+  case "$string" in
+    *[!$good]*)
+      die "bad $thing \`$string' given for $what"
+      ;;
+  esac
+}
+
+checknum () {
+  what=$1 string=$2
+  ## Check that STRING is at least plausibly numeric.
+
+  checkthing number "0-9" "$what" "$string"
+}
+
+checkpath () {
+  what=$1 string=$2
+  ## Check that STRING is a plausible pathname.
+
+  case "$string" in
+    .* | */.* | *[!-a-zA-Z0-9.,_#!%^+=@/:]*)
+      die "bad pathname \`$string' given for $what"
+      ;;
+  esac
+}
+
+checkword () {
+  what=$1 thing=$2
+  ## Check that THING doesn't need shell quoting, and doesn't interfere with
+  ## other common delimiter characters.  (Colons aren't allowed because they
+  ## mess up /etc/passwd; slashes aren't allowed because they're directory
+  ## separators.  Leading dots aren't allowed either.  Hashes seem OK.)
+
+  checkthing word "-a-zA-Z0-9.,_#!%^+=@" "$what" "$string"
+}
+
+domkdir () {
+  dir=$1 owner=$2 mode=$3
+  ## Make a directory and set permissions on it.
+
+  mkdir -m755 "$dir"
+  chown $owner "$dir"
+  chmod $mode "$dir"
+}
+
+###--------------------------------------------------------------------------
+### Volume and volume group maintenance.
+
+currenttag () {
+  ## Output the tag of the mounted backup volume group.
+
+  dev=$(mntdev $BKP)
+  case "$dev" in
+    /dev/mapper/cbkp-*) echo "${dev#*-}"; return ;;
+    *) die "failed to parse tag from device name \`$dev'" ;;
+  esac
+}
+
+guesstag () {
+  ## Guess and print the tag of the available backup volume group.  If there
+  ## is not exactly one volume group available, print an error and fail.
+
+  LVM_SUPPRESS_FD_WARNINGS=t vgs @backup --noheadings -o name,attr | {
+    match=""
+    while read name attr; do
+      case "$name" in bkp-*) ;; *) continue ;; esac
+      case "$attr" in ??x*) continue ;; esac
+      match="$match${match:+ }${name#bkp-}"
+    done
+    case "x$match" in
+      x) die "no backup volume groups available" ;;
+      x*\ *) die "multiple backup volume groups available: $match" ;;
+    esac
+    echo "$match"
+  }
+}
+
+mntdev () {
+  dir=$1
+  ## Output a device name for the filesystem mounted on DIR.
+
+  dev=$(mountpoint -d "$dir")
+  devname=$(udevadm info --query=name --path="/dev/block/$dev")
+  case "$devname" in
+    dm-*)
+      devname=mapper/$(dmsetup info -c --noheadings -oname "/dev/$devname")
+      ;;
+  esac
+  echo "/dev/$devname"
+}
+
+mntmeta () {
+  tag=$1
+  ## Mount the metadata volume of the backup volume group named TAG.
+
+  if ! mountpoint -q $META; then
+    mount "/dev/bkp-$tag/meta" $META
+  fi
+}
+
+cryptkey () {
+  ## Decrypt and output the key for the encrypted volume.  This assumes that
+  ## the metadata volume is already mounted on /mnt/bkpmeta.
+
+  seccure-decrypt -q -m128 -cp256 -F$KEYS/priv/backup-disk <$META/cur/blob
+}
+
+decrypt () {
+  tag=$1
+  ## Decrypt but don't mount the encrypted volume of the backup volume group
+  ## named TAG.
+
+  mntmeta "$tag"
+  if [ ! -b "/dev/mapper/cbkp-$tag" ]; then
+    cryptkey | cryptsetup luksOpen --key-file=- \
+      "/dev/bkp-$tag/crypt" "cbkp-$tag"
+  fi
+}
+
+mntcrypt () {
+  tag=$1
+  ## Mount the encrypted subvolume of the backup volume group named TAG.  The
+  ## metadata volume will be mounted if necessary.
+
+  decrypt "$tag"
+  if ! mountpoint -q $BKP; then
+    mount "/dev/mapper/cbkp-$tag" $BKP
+  fi
+}
+
+umnt () {
+  ## Unmounts a backup volume group: both the encrypted and metadata volumes
+  ## are unmounted.
+
+  if mountpoint -q $BKP; then
+    tag=$(currenttag) cryptclosep=t
+  else
+    cryptclosep=nil
+  fi
+  for i in bkp bkpmeta; do
+    if mountpoint -q /mnt/$i; then umount /mnt/$i; fi
+  done
+  case $cryptclosep in
+    t)
+      if [ -b "/dev/mapper/cbkp-$tag" ]; then
+	cryptsetup luksClose "cbkp-$tag"
+      fi
+  esac
+}
+
+defcmd initvol TAG DEVICE
+cmd_initvol () {
+  case $# in 2) ;; *) usage_err ;; esac
+  tag=$1 dev=$2
+
+  vgcreate --addtag @backup "bkp-$tag" "$dev"
+
+  lvcreate -L4M -nmeta "bkp-$tag"
+  mkfs -text2 -Lmeta "/dev/bkp-$tag/meta"
+  mntmeta "$tag"
+
+  mkdir -m755 $META/new
+  dd if=/dev/random bs=1 count=512 |
+  seccure-encrypt -m128 $(cat $KEYS/pub/backup-disk.pub) >$META/new/blob
+  mv $META/new $META/cur
+
+  lvcreate -l100%FREE -ncrypt "bkp-$tag"
+  cryptkey | cryptsetup luksFormat \
+    --cipher=twofish-xts-benbi:sha256 --hash=sha256 \
+    "/dev/bkp-$tag/crypt" -
+  decrypt "$tag"
+  mkfs -text2 -Lbackup -i1048576 "/dev/mapper/cbkp-$tag"
+  mntcrypt "$tag"
+}
+
+defucmd mount "[TAG]"
+cmd_mount () {
+  case $# in
+    0) tag=$(guesstag) check=nil ;;
+    1) tag=$1 check=t ;;
+    *) usage_err ;;
+  esac
+
+  if mountpoint -q $BKP; then
+    curtag=$(currenttag)
+    case "$check,$curtag" in "t,$tag") ;; t*) exit 1 ;; esac
+  else
+    mntcrypt "$tag"
+  fi
+}
+
+defcmd umount
+cmd_umount () {
+  case $# in 0) ;; *) usage_err ;; esac
+  mntp=nil
+  for i in bkp bkpmeta; do
+    if mountpoint -q /mnt/$i; then mntp=t; fi
+  done
+  case $mntp in
+    nil) die "backup volume not mounted" ;;
+  esac
+  umnt
+}
+
+###--------------------------------------------------------------------------
+### Archive maintenance.
+
+checkdir () {
+  key=$1 dir=$2
+  ## Check a directory which has `hashes' and `hashes.sig' files.
+
+  if ! seccure-verify -q -i"$dir/hashes" -- \
+    $(cat "$KEYS/$key") $(cat "$dir/hashes.sig")
+  then
+    die "failed to verify signature for \`$dir'"
+  fi
+
+  cd "$dir"
+  sha256sum --quiet -c hashes
+
+  tmpdir=$(mktmp)
+  find . -type f -print | sed 's:^\./::' | sort >"$tmpdir/present"
+  { echo hashes
+    echo hashes.sig
+    sed 's/^[a-f0-9]*[* ] //' hashes
+  } | sort >"$tmpdir/checked"
+  cd "$tmpdir"
+  diff -u checked present
+}
+
+fixperms () {
+  dir=$1 owner=$2 fmode=$3 dmode=$4
+  ## Fix the directory tree DIR so that everything is owned by OWNER (a
+  ## USER:GROUP pair) and has modes FMODE for files and DMODE for
+  ## directories.
+
+  ## Change all of the ownerships.  This will prevent anyone else from
+  ## changing the permissions on the files.  This assumes that chown(1) is
+  ## secure in recursive mode; I've checked that GNU chown seems correct.
+  chown -R $owner "$dir"
+
+  ## Paranoia: check that we correctly changed all of the files.
+  u=${owner%:*} g=${owner#*:}
+  (cd "$dir"; find . ! \( -user $u -group $g \) -ls) |
+  if read line; then
+    moan "failed to fix permssions on \`$dir'"
+    { echo $line; cat; } | sed 's/^/	/'
+    exit 1
+  fi
+
+  ## Now get to work on the file and directory permissions.
+  find "$dir" -type d -print0 | xargs -0r chmod $dmode
+  find "$dir" ! -type d -print0 | xargs -0r chmod $fmode
+}
+
+commitdir () {
+  dir=$1 target=$2
+  ## Commit an `prepare' directory DIR, moving its `incoming' files to
+  ## TARGET.  This will choose the correct name for the directory, but
+  ## assumes that it's already correctly laid out.  We assume that the
+  ## permissions on this directory are safe (e.g., they've already been fixed
+  ## using `fixperms').  On successful exit, DIR won't exist any more.  The
+  ## shell variable `label' is set to the resulting archive name.
+
+  ## If there's no `incoming' directory, then there's nothing to do.  Just
+  ## zap the directory and move on.
+  if [ ! -d "$dir/incoming" ]; then
+    rm -rf "$dir"
+    return
+  fi
+
+  ## Find the datestamp and level numbers to use for this directory.  These
+  ## are created before the `incoming' directory, so they ought to exist.
+  read level date time tz <"$dir/meta"
+
+  ## Find a suitable sequence number for the target.  This is rather ugly;
+  ## sorry.
+  seq=1
+  while :; do
+    anyp=nil
+    for i in "$target"/"$date#$seq".*; do
+      if [ -e "$i" ]; then anyp=t; break; fi
+    done
+    case $anyp in nil) break ;; esac
+    seq=$(( $seq + 1 ))
+  done
+
+  ## Move the directory.
+  label="$date#$seq.$level"
+  mv "$dir/incoming" "$target/$label"
+  rm -rf "$dir"
+
+  ## Update the catalogue.  Replace an existing dump at the same level.
+  ## Assume that dates are monotonically increasing: add the new entry at the
+  ## end.
+  { found=nil
+    while read lab l d t; do
+      if [ $l -ne $level ]; then echo $label $l $d $t; fi
+    done <"$target"/CATALOGUE
+    echo $level $date $time $tz
+  } >"$target"/CATALOGUE.new
+  mv "$target"/CATALOGUE.new "$target"/CATALOGUE
+}
+
+defcmd initmeta
+cmd_initmeta () {
+  case $# in 0) ;; *) usage_err ;; esac
+
+  ## Make a `new' directory and start recording our files.
+  cd $META
+  rm -rf new
+  mkdir -m755 new
+  f=""
+
+  ## Copy the blob from the existing metadata.
+  cp cur/blob new/
+  f="$f blob"
+
+  ## Archive the key recovery information.
+  cd $KEYS
+  tar cfz $META/new/keys.tgz pub/ recov/
+  f="$f keys.tgz"
+
+  ## Copy user and group information.
+  cd $META/new
+  for i in passwd group; do
+    grep -E '^(root|backup|bkp-[[:alnum:]]+):' /etc/$i >$i
+  done
+  f="$f passwd group"
+
+  ## Build the hashes file, and sign it.
+  chown root:root $f
+  chmod 644 $f
+  sha256sum $f >hashes
+  sign hashes
+
+  ## Replace the old metadata.
+  cd $META
+  mv cur old
+  mv new cur
+  rm -rf old
+}
+
+defcmd chkmeta
+cmd_chkmeta () {
+  case $# in 0) ;; *) usage_err ;; esac
+
+  checkdir pub/backup-auth.pub $META/cur
+}
+
+today () {
+  ## Report the current date, as ISO8601.  Allow an override.
+
+  case "${forceday+t}" in t) echo "$forceday" ;; *) date +%Y-%m-%d ;; esac
+}
+
+defucmd prep ASSET LEVEL \[DATE TIME TZ]
+cmd_prep () {
+  case $# in
+    2) set -- "$@" $(today) $(date +%H:%M:%S) $(date +%z) ;;
+    5) ;;
+    *) usage_err ;;
+  esac
+  asset=$1 level=$2 date=$3 time=$4 tz=$5
+  checkhost
+  checkword asset "$asset"
+  checknum level "$level"
+  checkthing date -0-9 date "$date"
+  checkthing time :0-9 time "$time"
+  checkthing timezone -+0-9 tz "$tz"
+
+  ## Make the host and asset directories if necessary.
+  cd $BKP
+  for i in $host $asset; do
+    if [ ! -d $i ]; then domkdir $i root:root 755; fi
+    cd $i
+  done
+  if [ ! -d failed ]; then domkdir failed root:root 755; fi
+  for i in . failed; do
+    if [ ! -f $i/CATALOGUE ]; then
+      touch $i/CATALOGUE
+      chown root:root $i/CATALOGUE
+      chmod 644 $i/CATALOGUE
+    fi
+  done
+
+  ## If an existing dump is in progress then archive it as a failure.
+  if [ -d prepare ]; then
+    if [ -d prepare/incoming ]; then
+      fixperms prepare/incoming root:root 640 755
+    fi
+    commitdir prepare failed/
+  fi
+
+  ## Make a new preparation directory.
+  domkdir prepare root:bkp-$host 755
+  echo $level $date $time $tz >prepare/meta
+  domkdir prepare/incoming bkp-$host:bkp-$host 2775
+
+  ## Print the directory name.
+  echo $BKP/$host/$asset/prepare/incoming
+}
+
+defucmd abort ASSET
+cmd_abort () {
+  case $# in 1) ;; *) usage_err ;; esac
+  asset=$1
+  checkhost
+  checkword asset "$asset"
+
+  ## Check that there's something to abort.
+  cd $BKP
+  if [ ! -d $host/$asset/prepare ]; then
+    die "no dump in progress for $host/$asset"
+  fi
+
+  ## Just throw it away.
+  rm -rf $host/$asset/prepare
+}
+
+defucmd fail ASSET
+cmd_fail () {
+  case $# in 1) ;; *) usage_err ;; esac
+  asset=$1
+  checkhost
+  checkword asset "$asset"
+
+  ## Check that there's something to fail.
+  cd $BKP
+  if [ ! -d $host/$asset/prepare ]; then
+    die "no dump in progress for $host/$asset"
+  fi
+
+  ## Archive the failure.  This shouldn't be used to determine dump levels or
+  ## we'll have gaps when things get sorted out.
+  cd $host/$asset
+  if [ -d prepare/incoming ]; then
+    fixperms prepare/incoming root:root 640 755
+  fi
+  commitdir prepare failed/
+}
+
+julian () {
+  date=$1
+  ## Convert an ISO8601 DATE to a Julian Day Number.
+
+  ## Extract the components of the date and trim leading zeros (which will
+  ## cause things to be interpreted as octal and fail).
+  year=${date%%-*} rest=${date#*-}; month=${rest%%-*} day=${rest#*-}
+  year=${year#0} month=${month#0} day=${day#0}
+
+  ## The actual calculation: convert a (proleptic) Gregorian calendar date
+  ## into a Julian day number.  This is taken from Wikipedia's page
+  ## http://en.wikipedia.org/wiki/Julian_day#Calculation but the commentary
+  ## is mine.  The epoch is 4713BC-01-01 (proleptic) Julian, or 4714BC-11-24
+  ## proleptic Gregorian.
+
+  ## If the MONTH is January or February then set a = 1, otherwise set a = 0.
+  a=$(( (14 - $month)/12 ))
+
+  ## Compute a year offset relative to 4799BC-03-01.  This puts the leap day
+  ## as the very last day in a year, which is very convenient.  The offset
+  ## here is sufficient to make all y values positive (within the range of
+  ## the JDN calendar), and is a multiple of 400, which is the Gregorian
+  ## cycle length.
+  y=$(( $year + 4800 - $a ))
+
+  ## Compute the offset month number in that year.  These months count from
+  ## zero, not one.
+  m=$(( $month + 12*$a - 3 ))
+
+  ## Now for the main event.  The (153 m + 2)/5 term is a surprising but
+  ## correct trick for obtaining the number of days in the first m months of
+  ## the (shifted) year).  The magic offset 32045 is what you get when you
+  ## plug the proper JDN epoch (year = -4713, month = 11, day = 24) into the
+  ## above machinery.
+  jdn=$(( $day + (153*$m + 2)/5 + 365*$y + $y/4 - $y/100 + $y/400 - 32045 ))
+
+  echo $jdn
+}
+
+dumplevel () {
+  fulldate=$1 lastdate=$2
+  ## Return the dump level, given that the most recent full dump occurred on
+  ## FULLDATE and the most revent dump of any kind occurred on LASTDATE.
+
+  ## Actually, we're much more interested in the day difference between these
+  ## two times.
+  fulljdn=$(julian $fulldate)
+  lastjdn=$(julian $lastdate)
+  now=$(today); nowjdn=$(julian $now)
+  lastday=$(( $lastjdn - $fulljdn ))
+  today=$(( $nowjdn - $fulljdn ))
+
+  ## If the difference is greater than 512 then we know we should do a full
+  ## dump.  (This provides an upper bound for the search below.  It should
+  ## never happen in practice, of course.)
+  if [ $(( $today - $lastday )) -ge 512 ]; then echo 0; return; fi
+
+  ## Now we work out the correct dump level.  This will assume that the
+  ## previous dump had a sensible level.  If dumps are omitted, then we will
+  ## choose a lower (more comprehensive) dump level than the schedule calls
+  ## for; such an overestimation will mean that we will probably end up
+  ## dumping too much again.  This is the right error to make.
+  ##
+  ## We use a Towers of Hanoi schedule.  If we're doing dumps every day, then
+  ## on day n since the last full dump, we work out the dump level as
+  ## follows: write n = 2^s t where t is odd (i.e., s is the number of
+  ## trailing zero bits in the binary representation of n); then the dump
+  ## level on day n is 9 - s.  This is enough for 512 days without a full
+  ## dump, and it fails gracefully anyway.
+  ##
+  ## Now we have to deal with the problem of skipping dumps.  Suppose the
+  ## last dump was on day m = 2^u v, and it's now day n = 2^s t.  We ought to
+  ## take the lowest dump level of any intervening day, i.e., the dump level
+  ## is 9 - a for the largest a such that there exists b with m < l = 2^a b
+  ## <= n.  We claim that such an l is unique.  Suppose, to the contrary,
+  ## that m < 2^a b < 2^a b' <= n, with both b and b' odd.  Then m < 2^{a+1}
+  ## (b + 1)/2 <= n, contradicting maximality of a.
+  ##
+  ## How does this help?  Observe that n = 2^s t = 2^a b + o, for some o <
+  ## 2^a: if o >= 2^a then 2^a (b + 1) <= n contradicting uniqueness of l.
+  ## Similarly, m = 2^u v = 2^a b - r, for some r <= 2^a (otherwise m <
+  ## 2^a (b - 1), again contradicting uniqueness).  Therefore, m and n are
+  ## identical from bit a + 1 onwards, and differ at bit a.  In other words,
+  ## a is the position of the most significant set bit in m XOR n.
+  diff=$(( lastday ^ today ))
+
+  ## We know that the bit position must be less than 16.
+  t=16 n=0
+  while [ $diff -gt 1 ]; do
+    xx=$(( $diff >> $t ))
+    if [ $xx -gt 0 ]; then
+      diff=$xx n=$(( $n + $t ))
+    fi
+    t=$(( $t >> 1 ))
+  done
+
+  echo $(( 9 - $n ))
+}
+
+defucmd level ASSET
+cmd_level () {
+  case $# in 1) ;; *) usage_err ;; esac
+  asset=$1
+  checkhost
+  checkword asset "$asset"
+
+  ## Set the correct directory.  If it doesn't exist then we obviously need a
+  ## level-0 dump.
+  cd $BKP
+  full="0 1970-01-01 00:00:00 +0000"
+  if [ ! -d $host/$asset ]; then echo $full; return; fi
+  cd $host/$asset
+
+  ## We need the time of the most recent dump of any kind, and the most
+  ## recent level-zero dump.
+  fulldate=none lastdate=none
+  while read label level date time tz; do
+    if [ $level -eq 0 ]; then fulldate=$date; fi
+    lastdate=$date
+  done <CATALOGUE
+  case $fulldate in none) echo $full; return ;; esac
+  level=$(dumplevel $fulldate $lastdate)
+
+  ## Determine the time of the most recent dump of the same or more inclusive
+  ## level.
+  date=none
+  while read lab l d t; do
+    if [ $l -le $level ]; then date=$d time=$t; fi
+  done <CATALOGUE
+  echo $level $date $time $tz
+}
+
+defucmd hash ASSET FILE HASH
+cmd_hash () {
+  case $# in 3) ;; *) usage_err ;; esac
+  asset=$1 file=$2 hash=$3
+  checkword asset "$asset"
+  checkpath file "$file"
+  checkword hash "$hash"
+
+  cd $BKP/$host/$asset/prepare
+
+  if [ -f hashes ]; then
+    while read h f; do
+      case "$f" in "$file") die "file \`$file' already hashed" ;; esac
+    done <hashes
+    cp hashes hashes.new
+  fi
+  echo "$hash  $file" >>hashes.new
+  mv hashes.new hashes
+}
+
+defucmd commit ASSET
+cmd_commit () {
+  case $# in 1) ;; *) usage_err ;; esac
+  asset=$1
+  checkhost
+  checkword asset "$asset"
+
+  cd $BKP/$host/$asset/prepare
+  fixperms incoming root:bkp-$host 640 755
+  findargs=""
+
+  if [ -f hashes ]; then
+    while read hash name; do
+      if [ ! -f "incoming/$name" ]; then
+	die "precomputed hash for nonexistent or non-file \`$name'"
+      fi
+      findargs="$findargs ! -path incoming/$name"
+    done <hashes
+    cp hashes hashes.calc
+  fi
+
+  find incoming -type f $findargs -print0 | \
+    xargs -0r sha256sum | \
+    sed 's:  incoming/:  :' \
+    >>hashes.calc
+  sort -k2 hashes.calc >incoming/hashes
+  sign incoming/hashes
+  chmod 640 incoming/hashes incoming/hashes.sig
+  chown root:bkp-$host incoming/hashes incoming/hashes.sig
+
+  cd ..
+  commitdir prepare .
+  echo "$label"
+}
+
+defucmd check ASSET LABEL
+cmd_check () {
+  case $# in 2) ;; *) usage_err ;; esac
+  asset=$1 label=$2
+  checkhost
+  checkword asset "$asset"
+  checkword label "$label"
+
+  checkdir pub/backup-auth.pub $BKP/$host/$asset/$label
+}
+
+defucmd catalogue ASSET
+cmd_catalogue () {
+  case $# in 1) ;; *) usage_err ;; esac
+  asset=$1
+  checkhost
+  checkword asset "$asset"
+
+  cat $BKP/$host/$asset/CATALOGUE
+}
+
+defucmd outdated ASSET
+cmd_outdated () {
+  case $# in 1) ;; *) usage_err ;; esac
+  asset=$1
+  checkhost
+  checkword asset "$asset"
+
+  cd $BKP/$host/$asset
+  for i in [0-9]*#*.*; do
+    if [ -d "$i" ]; then echo "$i"; fi
+  done |
+  sort -rn |
+  { best=10
+    while read tag; do
+      date=${tag%%#*} level=${tag##*.}
+      if [ $level -le $best ]
+      then best=$level
+      else echo "$tag"
+      fi
+    done
+  }
+}
+
+###--------------------------------------------------------------------------
+### Main program.
+
+defcmd test CMD '[ARGS ...]'
+cmd_test () { "$@"; }
+
+case $uservp in
+  t)
+    host=${USERV_USER#bkp-}
+    opts="h"
+    ;;
+  nil)
+    unset host
+    opts="hH:D:"
+    ;;
+esac
+
+while getopts "$opts" opt; do
+  case "$opt" in
+    h) cmd_help; exit ;;
+    H) host=$OPTARG ;;
+    D) forceday=$OPTARG ;;
+    *) usage_err ;;
+  esac
+done
+shift $(( $OPTIND - 1 ))
+
+case $# in 0) usage_err ;; esac
+lookupcmd "$1"; shift
+cmd_$cmdname "$@"
+
+###----- That's all, folks --------------------------------------------------
diff --git a/bkpfunc.sh b/bkpfunc.sh
new file mode 100755
index 0000000..99fbbac
--- /dev/null
+++ b/bkpfunc.sh
@@ -0,0 +1,99 @@
+### -*-sh-*-
+###
+### Functions for backup clients.
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+set -e
+
+###--------------------------------------------------------------------------
+### Utilities.
+
+QUIS=${0##*/}
+fail () { echo >&2 "$QUIS: $*"; exit 1; }
+
+preflight () {
+  for i in TMPDIR HOST RMT LEVEL LASTDATE ASSET TARGET; do
+    eval "p=\${BKP_$i+t}"
+    case "$p" in t) ;; *) fail "environment not correctly configured" ;; esac
+  done
+}
+
+run () {
+  echo "$*"
+  "$@"
+}
+
+bkpadmin () {
+  ## bkpadmin COMMAND ARGUMENT ...
+  ssh $BKP_HOST userv root bkpadmin "$@"
+}
+
+datefmt () {
+  fmt=$1 date=$2
+  ## Convert the DATE (in ISO8601-ish format, with an optional numeric
+  ## timezone offset) according to the strftime(3) format FMT.
+
+  ## We have Perl available, so you'd think this would be fairly easy.  Alas,
+  ## not.  The obvious thing to do would be use the Date::Format module, but
+  ## that's unusably broken.  Consider:
+  ##
+  ##   $t0 = 1319934600; $t1 = $t0 + 3600;	# obviously different
+  ##   for my $i ($t0, $t1)			# print identically
+  ##     { print time2str "%a %b %e %H:%M:%S %Y %z\n", $i; }
+  ##
+  ## The Date::Parse module seems to work correctly, but isn't installed by
+  ## default on some of our target platforms.  So we end up doing a lot of
+  ## the work by hand.
+
+  perl -e '
+    use POSIX;
+
+    my ($fmt, $date) = @ARGV;
+
+    ## Parse the input date.
+    my ($yr, $mo, $dy, $hr, $mi, $s, $tz) = $date =~
+      /^\s*(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)(?:\s+)?([-+]\d+)?\s*$/
+      or die "bad input date `$date'\''";
+
+    ## Convert the input date into a time_t.  This is annoyingly fiddly.  If
+    ## an explicit timezone offset is supplied, do the conversion as UTC, and
+    ## then apply the timezone correction.  This means that we must hack
+    ## about with the awful BCD timezone offset.
+    my $t;
+    if (!defined $tz) {
+      $t = mktime $s, $mi, $hr, $dy, $mo - 1, $yr - 1900, undef, undef, -1;
+    } else {
+      use integer;
+      my ($tzsign, $tzabs) = $tz < 0 ? (-1, -$tz) : (+1, $tz);
+      my $tzoff = $tzsign*(60*($tzabs/100) + ($tzabs%100));
+      local $ENV{TZ} = "UTC0"; tzset;
+      $t = mktime $s, $mi, $hr, $dy, $mo - 1, $yr - 1900, undef, undef, 0;
+      $t -= 60*$tzoff;
+    }
+
+    ## Now format this as requested.
+    tzset; my @tm = localtime $t;
+    print strftime $fmt, @tm;
+    print "\n";
+  ' "$fmt" "$date"
+}
+
+###----- That's all, folks --------------------------------------------------
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..66d3dfc
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,57 @@
+dnl -*-autoconf-*-
+dnl
+dnl Configure script for distorted.org.uk backup software
+dnl
+dnl (c) 2011 Mark Wooding
+dnl
+
+dnl----- Licensing notice ---------------------------------------------------
+dnl
+dnl This program is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program; if not, write to the Free Software Foundation,
+dnl Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+dnl--------------------------------------------------------------------------
+dnl Initialization.
+
+mdw_AUTO_VERSION
+AC_INIT([distorted-backup], AUTO_VERSION, [mdw@distorted.org.uk])
+AC_CONFIG_SRCDIR([bkpadmin.in])
+AC_CONFIG_AUX_DIR([config])
+AM_INIT_AUTOMAKE([foreign])
+mdw_SILENT_RULES
+
+AC_PROG_CC
+
+dnl--------------------------------------------------------------------------
+dnl C programming environment.
+
+PKG_CHECK_MODULES([mLib], [mLib >= 2.1.0])
+PKG_CHECK_MODULES([nettle], [nettle >= 2.4])
+
+dnl--------------------------------------------------------------------------
+dnl Perl programming environment.
+
+AC_ARG_VAR([PERL], [Path to your favourite Perl binary.])
+AC_PATH_PROGS([PERL], [perl perl5], [false])
+AX_PROG_PERL_VERSION([5.10],,
+		     [AC_MSG_ERROR([Failed to find suitable Perl.])])
+
+dnl--------------------------------------------------------------------------
+dnl Output.
+
+AC_CONFIG_FILES(
+  [Makefile])
+AC_OUTPUT
+
+dnl----- That's all, folks --------------------------------------------------
diff --git a/lvm-rmsnap.8 b/lvm-rmsnap.8
new file mode 100644
index 0000000..19fdb2d
--- /dev/null
+++ b/lvm-rmsnap.8
@@ -0,0 +1,124 @@
+.ie t .ds o \(bu
+.el .ds o o
+.de hP
+.IP
+\h'-\w'\fB\\$1\ \fP'u'\fB\\$1\ \fP\c
+..
+.TH lvm-rmsnap 8 "6 November 2011" "distorted.org.uk backup"
+.SH NAME
+lvm-rmsnap \- remove an LVM snapshot, despite LVM bugs
+.SH SYNOPSIS
+.B lvm-rmsnap
+.RB [ \-dn ]
+.IB vgname / lvname
+.SH DESCRIPTION
+The
+.B lvm-rmsnap
+tool removes an LVM snapshot volume.  The command
+.IP
+.BI "lvm-rmsnap " vgname / lvname
+.PP
+does what you'd hope, in an ideal world,
+.IP
+.BI "lvremove \-f " vgname / lvname
+.PP
+would do.  (Yes,
+.B lvm-rmsnap
+doesn't prompt if the snapshot is active.  There are LVM bugs which
+prevent you from deactivating the snapshot, and this tool is intended to
+be used in noninteractive scripts.)
+.PP
+Unfortunately, the world is not ideal, and trying to remove snapshots
+using current versions of LVM runs into a collection of nasty race
+conditions with
+.BR udev (7).
+The purpose of
+.B lvm-rmsnap
+is to remove snapshots despite these bugs.  It acts as a `nanny' for
+.BR lvremove ,
+carefully tracking all of the toys, and putting them back in the pram.
+.SS "Command line"
+The following command-line options are accepted.
+.TP
+.B "\-h, \-\-help"
+Print a help message to standard output and exit with status zero.
+.TP
+.B "\-v, \-\-version"
+Print the program's version number to standard output and exit with
+status zero.
+.TP
+.B "\-d, \-\-debug"
+Print to standard error information about what the program is doing and
+what things it's found out.  This may be useful if you're trying to find
+out why
+.B lvm-rmsnap
+is misbehaving.
+.TP
+.B "\-n, \-\-noact"
+Don't actually perform corrective actions.  This is pretty useless
+without
+.BR \-d .
+You probably don't want to use this unless you're testing
+.BR lvm-rmsnap .
+.SS "Operation"
+The basic problem with
+.BR lvremove (8)
+is that, partway through, it discovers that some device it was about to
+fiddle with is currently in use by some other process \(en usually
+invoked by a
+.BR udev (7)
+rule \(en and then gives up, leaving things in a messed-up state.
+There's an elaborate and wobbly synchronization protocol which involves
+passing System V semaphore set ids through the kernel and is meant to
+make stuff like this not go wrong, but it does anyway.
+.PP
+The job of
+.B lvm-rmsnap
+is to remove a snapshot despite these bugs.  Usually, repeating the
+.B lvremove
+attempt will succeed, though there's often debris of various kinds to be
+cleared away.  Here's a list of the things that
+.B lvm-rmsnap
+tries to do.
+.hP \*o
+If you're very unlucky, then LVM will leave the snapshot origin volume
+suspended, which will cause a subsequent
+.B lvremove
+attempt to wedge itself (and block any other processes trying to do I/O
+to that volume).  So
+.B lvm-rmsnap
+resumes the volume before retrying.
+.hP \*o
+A failed
+.B lvremove
+can leak `cookies', which are really System V semaphore sets.  These use
+up kernel memory, and can't be automatically garbage-collected (this is
+a well-known mistake in the System V IPC design).  So
+.B lvm-rmsnap
+keeps track of the cookies used, and releases them if
+.B lvmremove
+failed to do so.
+.hP \*o
+Setting up a snapshot involves a little juggle: a new 
+.RB ` real '
+device is created, exactly like the origin volume; and then the origin
+volume is changed to be a
+.B snapshot-origin
+volume pointing at the new device.  When
+.B lvremove
+fails, the
+.RB ` real '
+device can be left behind.  So
+.B lvm-rmsnap
+detects this situation and removes it, after checking that it really
+isn't needed for anything.
+.SH BUGS
+If you know of an LVM snapshot-removal bug which this won't work around
+then please let the author know.
+.SH SEE ALSO
+.BR dmsetup (8),
+.BR lvm (8),
+.BR lvremove (8),
+.BR udev (7).
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
diff --git a/lvm-rmsnap.in b/lvm-rmsnap.in
new file mode 100755
index 0000000..0a46166
--- /dev/null
+++ b/lvm-rmsnap.in
@@ -0,0 +1,462 @@
+#! @PERL@
+### -*-perl-*-
+###
+### Remove an LVM snapshot, without falling foul of LVM bugs
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+use Cwd qw(realpath);
+use Errno qw(:POSIX);
+use Fcntl qw(:mode);
+use File::stat;
+use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
+use IO::Handle;
+use Time::HiRes qw(time);
+
+our $VERSION = "@VERSION@";
+
+###--------------------------------------------------------------------------
+### Utilities.
+
+## Error handling and reporting.
+(our $QUIS = $0) =~ s:^.*/::;
+our $DEBUG = 0;
+sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
+sub burble ($) { my ($msg) = @_; whine $msg if $DEBUG; }
+sub fail ($) { my ($msg) = @_; whine $msg; exit $! || ($? >> 8) || 255; }
+
+## Cleanups.  Call `cleanup BLOCK' to arrange to have BLOCK executed at the
+## end of the program.
+our @CLEANUP = ();
+sub runcleanups { for my $f (@CLEANUP) { &$f } }
+END { runcleanups; }
+$SIG{INT} = $SIG{TERM} = sub {
+  my $sig = shift;
+  runcleanups;
+  $SIG{$sig} = 'DEFAULT';
+  kill $sig => $$;
+};
+sub cleanup (&) { unshift @CLEANUP, $_[0]; }
+
+sub fixint ($) { my ($x) = @_; return $x =~ /^0/ ? oct $x : $x + 0; }
+
+###--------------------------------------------------------------------------
+### Device fiddling.
+
+sub devsys ($) {
+  ## devsys DEV
+  ##
+  ## Return a sysfs path for a device DEV.
+
+  my ($dev) = @_;
+  my $st = stat $dev or fail "stat ($dev): $!";
+  my $kind;
+  if (S_ISBLK($st->mode)) { $kind = "block"; }
+  elsif (S_ISCHR($st->mode)) { $kind = "char"; }
+  else { fail "$dev is not a device"; }
+  my ($maj, $min) = (($st->rdev >> 8) & 0xff, $st->rdev & 0xff);
+  (my $whole = realpath "/sys/dev/$kind/$maj:$min") =~ s:^/sys/:/:;
+  return $whole;
+}
+
+our %DMTAB = ();
+
+sub dmtable_update () {
+  ## dmtable_update
+  ##
+  ## Update the device-mapper table in %DMTAB.
+
+  burble "re-read device-mapper table";
+  %DMTAB = ();
+  open my $dt, "-|", "dmsetup", "table" or fail "open (dm table): $!";
+  while (my $line = $dt->getline) {
+    my ($dev, $rest) = split /[:\s]+/, $line, 2;
+    push @{$DMTAB{$dev}}, [split ' ', $rest];
+  }
+  close $dt or fail "dmsetup table failed (rc = $?)";
+}
+
+sub dmname ($) {
+  ## dmname SYSPATH
+  ##
+  ## Return the device-mapper node name for the sysfs path SYSPATH.
+
+  my ($sys) = @_;
+  open my $f, "<", "/sys$sys/dm/name" or fail "open ($sys/dm/name): $!";
+  chomp (my $name = $f->getline);
+  close $f;
+  return $name;
+}
+
+###--------------------------------------------------------------------------
+### I/O utilities.
+
+sub sel ($;$$$) {
+  ## sel TIMEOUT, [READS, WRITES, EXCEPTIONS]
+  ##
+  ## Wait for at most TIMEOUT seconds (indefinitely if TIMEOUT is `undef').
+  ## Each of READS, WRITES and EXCEPTIONS is a listref containing FILE => SUB
+  ## pairs: if the FILE is readable (writable, has an exceptional condition)
+  ## then the SUB is invoked.
+
+  my ($t, $r, $w, $x) = @_;
+  my ($vr, $vw, $vx);
+  my (%r, %w, %x);
+
+  ## Read the arguments and build a data structure.
+  for my $i ([$r, \$vr, \%r], [$w, \$vw, \%w], [$x, \$vx, \%x]) {
+    my ($a, $v, $h) = @$i;
+    next unless $a;
+    my @a = @$a;
+    while (@a) {
+      my ($f, $g) = splice @a, 0, 2;
+      my $fd = $f->fileno;
+      $h->{$fd} = $g;
+      vec($$v, $fd, 1) = 1;
+    }
+  }
+
+  ## Do the wait and sift through the results.
+  defined select $vr, $vw, $vx, $t or fail "select: $!";
+  for my $i ([$vr, \%r], [$vw, \%w], [$vx, \%x]) {
+    my ($v, $h) = @$i;
+    while (my ($f, $g) = each %$h) {
+      if (vec $v, $f, 1) { &$g; }
+    }
+  }
+}
+
+sub doread ($;$) {
+  ## doread FILE, [LEN]
+  ##
+  ## Read LEN bytes (or a default amount) from FILE.  If the file ends,
+  ## return undef.  If reading would block then return an empty string.
+  ## Otherwise return he stuff.
+
+  my ($f, $n) = @_;
+  $n = sysread $f, my $buf, $n // 4096;
+  if (!defined $n) { return "" if $! == EAGAIN; fail "read: $!"; }
+  elsif (!$n) { return undef; }
+  else { return $buf; }
+}
+
+sub run ($$@) {
+  ## run WHAT, PROG, ARGS...
+  ##
+  ## Run PROG, passing it ARGS.  Fails if PROG exits nonzero.
+
+  my ($what, $prog, @args) = @_;
+  system($prog, @args) == 0 or fail "$prog ($what) failed (rc = $?)";
+}
+
+sub capture ($@) {
+  ## capture PROG, ARGS...
+  ##
+  ## Run PROG, passing it ARGS.  Returns exit status, stdout, and stderr, as
+  ## strings.
+
+  my ($prog, @args) = @_;
+  my ($out, $err) = ("", "");
+  my ($outpipe_in, $outpipe_out, $errpipe_in, $errpipe_out);
+  pipe $outpipe_in, $outpipe_out or fail "pipe ($prog out): $!";
+  pipe $errpipe_in, $errpipe_out or fail "pipe ($prog err): $!";
+  defined (my $kid = fork) or fail "fork ($prog): $!";
+  if ($kid == 0) {
+    close $outpipe_in
+      and close $errpipe_in
+      and open STDOUT, ">&", $outpipe_out
+      and open STDERR, ">&", $errpipe_out
+      and exec $prog, @args
+      or fail "exec $prog: $!";
+  }
+  close $outpipe_out;
+  close $errpipe_out;
+  for (;;) {
+    my @r = ();
+    for my $i ([\$outpipe_in, \$out, "out"],
+	       [\$errpipe_in, \$err, "err"]) {
+      my ($p, $b, $w) = @$i;
+      push @r, $$p => sub {
+	my $buf = doread $$p;
+	if (defined $buf) { $$b .= $buf; }
+	else { close $$p; $$p = undef; }
+      } if $$p;
+    }
+    last unless @r;
+    sel undef, \@r;
+  }
+  waitpid $kid, 0 or fail "waitpid ($prog): $!";
+  return $?, $out, $err;
+}
+
+###--------------------------------------------------------------------------
+### Monitoring udev events.
+
+sub umon_create (@) {
+  ## umon_create ARGS...
+  ##
+  ## Create a udev monitor, with the given `udevadm monitor' arguments, and
+  ## return an object.  We always select only kernel events.  We try to wait
+  ## for the monitor to start up before returning.  Don't trust this: use
+  ## `umon_sync' anyway.
+
+  my @args = @_;
+  my $u = {};
+
+  ## Start the monitor process.
+  $u->{KID} = open($u->{PIPE}, "-|",
+		   "stdbuf", "-o0",
+		   "udevadm", "monitor", "--kernel", "--property", @args)
+    or fail "open (umon): $!";
+  cleanup { kill 9, $u->{KID} };
+  $u->{PIPE}->blocking(0) or fail "set non-blocking (umon): $!";
+
+  ## Wait for the end of the preamble, indicated by the first blank line.
+  ## From observation with strace(1), this means that the monitor has
+  ## successfully attached itself to its netlink socket and is ready to fetch
+  ## events.
+  my $ok = 0;
+  my $buf = "";
+  my $now = time;
+  my $end = $now + 5;
+  while (!$ok) {
+    sel
+      $end - $now,
+      [ $u->{PIPE} => sub {
+	  defined (my $b = doread $u->{PIPE}) or fail "read (umon): eof";
+	  $buf .= $b;
+	  if ($buf =~ /\n\n(.*)$/) { $ok = 1; $buf = $1; }
+	}
+      ];
+    $now = time;
+    if ($now >= $end) { fail "umon timeout"; }
+  }
+  $u->{BUF} = $buf;
+
+  ## Done.
+  return $u;
+}
+
+sub umon_read ($) {
+  ## umon_read UMON
+  ##
+  ## Read events from UMON, as a list of hash references mapping properties
+  ## to their values.
+
+  my ($u) = @_;
+  my @s = ();
+  for (;;) {
+    defined (my $buf = doread $u->{PIPE}) or fail "read (umon): end of file";
+    $buf eq "" and last;
+    $buf = $u->{BUF} . $buf;
+    my @r = split /\n\n/, $buf, -1;
+    $u->{BUF} = pop @r;
+    for my $r (@r) {
+      push @s, { map { /^(\w+)=(.*)$/ } split /\n/, $r };
+    }
+  }
+  return @s;
+}
+
+sub umon_sync ($$) {
+  ## umon_sync UMON, DEV
+  ##
+  ## Wait for UMON to report an event about the device DEV (without its
+  ## `/dev/' prefix), triggering periodically just in case it missed one.
+  ## This is useful for synchronizing.  Returns the list of events which
+  ## weren't interesting.
+
+  my ($u, $dev) = @_;
+  my $now = time;
+  my $retry = 0;
+  my $done = 0;
+  my @ev = ();
+  burble "sync with udev";
+
+  until ($done) {
+
+    ## Too late.  Trigger a change event and try again.
+    if ($now >= $retry) {
+      $retry = $now + 2;
+      run "trigger $dev", "udevadm", "trigger", "--sysname-match=$dev";
+    }
+
+    ## Now read events and see what happens.
+    sel
+      $retry - $now,
+      [ $u->{PIPE} => sub {
+	  my @e = umon_read $u;
+	  while (@e) {
+	    my $e = shift @e;
+	    if ($e->{DEVNAME} eq $dev) { $done = 1; push @ev, @e; last; }
+	    else { push @ev, $e; }
+	  }
+	}
+      ];
+    $now = time;
+  }
+
+  return @ev;
+}
+
+###--------------------------------------------------------------------------
+### Main code.
+
+## Parse the command line.
+our $USAGE = "usage: $QUIS VGNAME/LVNAME";
+sub version { print "$QUIS, version $VERSION\n"; }
+sub help {
+  print <<EOF;
+$USAGE
+
+Options:
+  -h, --help		Show this help text.
+  -v, --version		Show the program version number.
+  -d, --debug		Show debugging information.
+  -n, --no-act		Don't take corrective actions.
+EOF
+}
+
+our $NOACT = 0;
+GetOptions('help|h|?'		=> sub { version; help; exit; },
+	   'version|v'		=> sub { version; exit; },
+	   'debug|d'		=> \$DEBUG,
+	   'noact|n'		=> \$NOACT)
+  and @ARGV == 1
+  and @ARGV[0] =~ m:(.+)/(.+):
+  or do { print STDERR $USAGE, "\n"; exit 1; };
+our ($VG, $LV) = ($1, $2);
+
+## Check that the volume in question actually exists, and is a device-mapper
+## device, before we wheel out the big guns.
+dmtable_update;
+our $SYS = devsys "/dev/$VG/$LV";
+burble "sysfs name is $SYS";
+my $t = $DMTAB{dmname $SYS}
+  or fail "/dev/$VG/$LV isn't a device-mapper device";
+if ($DEBUG) {
+  burble "found table...";
+  burble "\t" . join " ", @$_ foreach @$t;
+}
+$t->[0][2] eq "snapshot" or fail "/dev/$VG/$LV isn't a snapshot";
+
+## Create a udev monitor.  We're only interested in disk-shaped block
+## devices.  (If we use some other device kind for synchronization then this
+## filter will have to be broadened.)
+my $u = umon_create "--subsystem-match=block/disk";
+
+## Prepare for the awful synchronization hack.  We need to make sure, below,
+## that we've read all of the interesting events resulting from an `lvremove'
+## call.  To do this, we wait for an event on a different device -- but we
+## must avoid being fooled by spurious events on this device.  As an attempt
+## to minimize the probability of this going wrong, acquire a pet device
+## which nobody else is using.  The best idea seems to be a loopback device.
+open my $lopipe, "-|", "losetup", "--show", "--find", "/etc/motd"
+  or fail "open (losetup attach)";
+chomp (my $lo = $lopipe->getline);
+{ local $/ = undef; <$lopipe>; }
+$lo =~ s:^/dev/::;
+$lopipe->close or fail "wait (losetup attach): $!";
+cleanup { system "losetup", "--detach", "/dev/$lo" };
+
+## Initial synchronization, to make sure stuff works.
+umon_sync $u, $lo;
+
+## Try to remove the snapshot.  Capture stdout and stderr, and relay them if
+## nothing serious went wrong.
+burble "initial attempt to remove snapshot";
+my ($rc, $out, $err) = capture "lvremove", "--force", "$VG/$LV";
+if ($rc != 0x500) {
+  print STDOUT $out;
+  print STDERR $err;
+  burble "lvremove didn't explode (rc = $rc): we're done here";
+  if ($rc >> 8) { $rc >>= 8 }
+  elsif ($rc & 255) { $rc += 128 }
+  exit $rc;
+}
+burble "initial lvremove failed";
+
+## OK, stuff went wrong.  First see if there was a udev cookie left over, and
+## if so try to release it.  It's important to know that we've read all of
+## the relevant uevents, so synchronize again.
+my @e = umon_sync $u, $lo;
+my %c = ();
+for my $e (@e) {
+  $c{($e->{DM_COOKIE} & 0xffff) | 0xd4d0000} = 1
+    if $e->{DEVPATH} eq $SYS && exists $e->{DM_COOKIE};
+}
+burble "cookies used: " . join ", ", map { sprintf "0x%x", $_ } keys %c;
+
+## Find the used cookies which are still extant, and release them.
+open $uc, "-|", "dmsetup", "udevcookies" or fail "open (cookies): $!";
+$uc->getline;
+my @leak = ();
+while (my $l = $uc->getline) {
+  my @f = split ' ', $l;
+  push @leak, $f[0] if $c{fixint $f[0]};
+}
+close $uc or fail "udevcookies failed (rc = $?)";
+for my $c (@leak) {
+  burble "release leaked cookie $c";
+  run "release cookie", "dmsetup", "udevreleasecookie", $c unless $NOACT;
+}
+
+## If we're very unlucky, the origin volume may still be suspended.  Resume
+## it now, or the next attempt will get stuck.  (Resuming is idempotent, so
+## we don't need to check whether it's already running.)  Finding the origin
+## is annoying: search the device-mapper table for a device with a
+## `snapshot-origin' table referencing the same backing store as the
+## snapshot.
+my $back = $DMTAB{dmname $SYS}[0][3];
+my $orig = undef;
+burble "backend device $back";
+for my $dm (keys %DMTAB) {
+  my $t = $DMTAB{$dm};
+  next unless @$t == 1 &&
+    $t->[0][2] eq "snapshot-origin" &&
+    $t->[0][3] eq $back;
+  defined $orig and fail "snapshot appears to have multiple origins";
+  $orig = $dm;
+}
+defined $orig or fail "couldn't find snapshot origin device";
+burble "found origin volume $orig; resuming...";
+run "resume origin $orig", "dmsetup", "resume", $orig unless $NOACT;
+
+## See whether removing the snapshot again helps any.
+burble "retry snapshot removal";
+run "retry", "lvremove", "--force", "$VG/$LV" unless $NOACT;
+
+## OK, we're on the way to recovery.  The origin device may now be not a
+## snapshot-origin any more.  Refresh the device-mapper table and inspect it.
+dmtable_update;
+if (-d "/sys/dev/block/$back") {
+  my $backdm = dmname "/dev/block/$back";
+  if ($DMTAB{$orig}[0][2] ne "snapshot-origin") {
+    burble "origin released but backend $backdm still exists: remove";
+    run "remove backend $backdm", "dmsetup", "remove", $backdm
+      unless $NOACT;
+  }
+}
+
+## All done.  There, that wasn't so bad, was it?
+burble "completed successfully";
+exit 0;
+
+###----- That's all, folks --------------------------------------------------
diff --git a/rfreezefs.8 b/rfreezefs.8
new file mode 100644
index 0000000..8cfd948
--- /dev/null
+++ b/rfreezefs.8
@@ -0,0 +1,317 @@
+.TH rfreezefs 8 "October 2011" "distorted.org.uk backup"
+.SH NAME
+rfreezefs \- freeze a filesystem safely
+.SH SYNOPSIS
+.B rfreezefs
+.RB [ \-n ]
+.RB [ \-a
+.IR address ]
+.RB [ \-p
+.IR loport [\fB\- hiport ]]
+.I filesystem
+\&...
+.SH DESCRIPTION
+The
+.B rfreezefs
+program freezes one or more mounted filesystems for a period of time,
+and then thaws them.  For more detail on what this means, why you'd want
+to, and how you might go about using
+.B rfreezefs
+to do it, see below.
+.PP
+The following command-line options are recognized.
+.TP
+.B "\-h, \-\-help"
+Writes a help message to standard output, and exits with status 0.
+.TP
+.B "\-v, \-\-version"
+Writes the version number to standard output, and exits with status 0.
+.TP
+.B "\-u, \-\-usage"
+Writes a command-line usage synopsis to standard output, and exits with
+status 0.
+.TP
+.BI "\-a, \-\-address=" address
+Listen only for incoming connections to the given
+.IR address .
+The default is to listen for connections to any local address.
+.TP
+.B "\-n, \-\-not-really"
+Don't actually freeze or thaw any filesystems; instead, write messages
+to standard error explaining what would be done.
+.TP
+.BI "\-p, \-\-port-range=" loport\fR[ \- hiport \fR]]
+Listen for incoming connections on a port between
+.I loport
+and
+.IR hiport .
+If
+.I hiport
+is omitted, listen for connections only on
+.IR loport .
+The default is to allow the kernel a free choice of local port number.
+.PP
+The
+.I filesystem
+arguments name the filesystems to be frozen.  There must be at least one
+such argument.  It's conventional to name the filesystem mount points,
+though actually any file or directory in the filesystem will do.  The
+files are opened read-only.
+.PP
+The
+.B rfreezefs
+program starts, parses its command line, opens the named files, and
+creates a listening TCP socket according to the command-line options.
+It then prints a sequence of lines to standard output, which may have
+one of the following forms.
+.TP
+.BI "PORT " port
+Announces the TCP
+.I port
+number on which that
+.B rfreezefs
+is listening for incoming connections.
+.TP
+.BI "TOKEN " label " " token
+Declares a `token': a randomly chosen string which is to be used in the
+network connection.  The token's value is
+.IR token :
+token values are a sequence of non-whitespace printable ASCII
+characters, but their precise structure is not specified.  The token
+value will have the meaning given by the
+.IR label ,
+which is one of the token labels described below.
+.TP
+.B READY
+Marks the end of the lines and announces that
+.B rfreezefs
+is ready to accept connections.
+.PP
+These lines may be sent in any order, except that
+.B READY
+is always last.  There may be many
+.B TOKEN
+lines.
+.PP
+Network communications use a simple plain-text line-oriented protocol.
+Each line consists of a token, optionally followed by a carriage return
+(code 13), followed by a linefeed (code 10).  No other whitespace is
+permitted.  The tokens allowed are precisely those announced in the
+.B TOKEN
+lines written to
+.BR rfreezefs 's
+standard output.  Furthermore, only certain tokens are valid at
+particular points in the protocol.  For reference, the token labels, and
+the meanings of the corresponding tokens, are as follows.
+.TP
+.B FREEZE
+Sent by a client to freeze the filesystems.  This must be the first
+token transmitted by the client.  On receipt,
+.B rfreezefs
+will close its listening socket and any other client connections.  It
+will then freeze the filesystems.
+.TP
+.B FROZEN
+Sent by
+.B rfreezefs
+to indicate successful freezing of the filesystem.
+.TP
+.B KEEPALIVE
+Sent periodically by the client to prevent filesystems being thawed due
+to a timeout.  No explicit acknowledgement is sent.
+.TP
+.B THAW
+Sent by the client to request thawing of the filesystems.
+.TP
+.B THAWED
+Sent by
+.B rfreezefs to indicate successful thawing of the filesystems in response to
+.BR THAW .
+.PP
+The high-level structure of the protocol is then as follows: the client
+sends
+.BR FREEZE ;
+the server freezes and responds with
+.BR FROZEN ;
+the client optionally sends
+.B KEEPALIVE
+at intervals; the client finally sends
+.BR THAW ;
+and the server responds with
+.B THAWED
+and drops the connection.
+.PP
+If sufficient time passes without
+.B rfreezefs
+receiving either
+.B THAW
+or
+.B KEEPALIVE
+tokens, or an invalid token is received, or it receives one of a number
+of signals, currently
+.BR SIGINT ,
+.BR SIGQUIT ,
+.BR SIGTERM ,
+.BR SIGHUP ,
+.BR SIGALRM ,
+.BR SIGILL ,
+.BR SIGSEGV ,
+.BR SIGBUS ,
+.BR SIGFPE ,
+or
+.BR SIGABRT ,
+.B rfreezefs
+will thaw the filesystems and report a failure.
+.PP
+Diagnostics are reported to standard error.  Exit statuses have specific
+meanings:
+.TP
+.B 0
+Successful completion.  Filesystems were frozen and thawed as required.
+.TP
+.B 1
+Problem with command-line arguments.  No filesystems were frozen.
+.TP
+.B 2
+Environmental problem, typically a system call failure: e.g., a file
+failed to open, or there was a problem with the network communications.
+Either no filesystems were frozen, or all filesystems were successfully
+thawed again.
+.TP
+.B 3
+Timeout or invalid data.  Either no connections containing the cookie
+were made in time, or no data was received for a long enough period
+after the filesystems were frozen, or an invalid token was received.  In
+the first case, no filesystems were frozen; in the other two cases, the
+filesystems were successfully thawed.
+.TP
+.B 4
+Crash.  The
+.B rfreezefs
+program received a fatal signal after it had started to freeze
+filesystems.  Under these circumstances, it thaws the filesystems,
+removes the signal handler, and sends itself the signal again, but if
+that doesn't work then
+.B rfreezefs
+exits with this status code.  All frozen filesystems were successfully
+thawed again.
+.TP
+.B 112
+Failure during filesystem thaw (mnemonic: European emergency number).
+Some filesystems
+.I failed
+to thaw, and are still frozen.  You might have some joy with
+.BR SysRq-j ,
+though in the author's experience that doesn't work and you'll probably
+have to reboot.  At least your filesystems are consistent...
+.SS Background
+When frozen, a filesystem's backing block device is put in a consistent
+state (as if unmounted), and write operations to it are delayed until
+the filesystem is thawed again.  In the meantime, it's possible to take
+a consistent snapshot of the block device.  When a filesystem is
+directly mounted on an LVM logical volume, the kernel detects this
+situation and automatically freezes the filesystem while the snapshot is
+being prepared.  If the logical volume and filesystem are on separate
+hosts, though, the filesystem must be frozen manually, which is why
+.B rfreezefs
+is useful.
+.PP
+The idea is to run
+.B rfreezefs
+using
+.BR ssh (1)
+or
+.BR userv (1),
+or some other means of acquiring the necessary privilege level.  You
+read the port number and tokens, connect to the socket, and send the
+.B FREEZE
+token followed by a newline.  You now wait to receive the
+.B FROZEN
+token from
+.BR rfreezefs .
+Once you have received this, the filesystems are frozen: you can safely
+take snapshots.  If this will take an extended amount of time, you
+should send
+.B KEEPALIVE
+tokens to the connection at intervals in order to prevent
+.B rfreezefs
+from timing out and thawing the filesystems (but see the
+.B "Security notes"
+below).  When your snapshot is prepared, sent the
+.B THAW
+token, and wait for the
+.B THAWED
+token in response.  If this is received, the snapshot was completed
+successfully and the filesystems are properly thawed again.  If you
+don't receive the
+.B THAWED
+token then something bad might have happened (e.g., the filesystem might
+have been prematurely thawed) and the snapshot is suspect.  If the exit
+status is 112 then at least one filesystem is still frozen and some
+emergency action is needed.  If you can't retrieve the exit status then
+it's possible that your transport is blocked for trying to write to the
+frozen filesystem (this especially likely if
+.B /
+or
+.B /var
+is frozen) and you should react as if the status was 112.
+.SS Security notes
+The
+.B rfreezefs
+program uses randomly chosen tokens to form a simple code which is
+revealed to the caller.  It is assumed that this information is kept
+secret from adversaries, e.g., by ensuring that it is only transmitted
+over local pipes (as used by
+.BR userv (1))
+and/or secure network transports such as SSH (see
+.BR ssh (1)).
+The author believes that the worst possible outcome is that the host
+wedges up because an important filesystem is frozen, and
+.B rfreezefs
+therefore strives to prevent that from happening.  In particular,
+cryptographic transport implementations such as SSH may attempt to log
+messages to frozen filesystems or otherwise wedge themselves:
+.B rfreezefs
+deliberately uses only kernel-implemented transports for its
+communication needs once the filesystems are frozen.
+.PP
+Most of the tokens are used at most once in the protocol.  In
+particular, the
+.B FROZEN
+token can't be sent by an adversary in advance of the filesystem being
+frozen, since (under the assumption that the tokens are kept secret) it
+only revealed in the clear after a successful freeze.  Similarly, the
+.B THAWED
+token is only transmitted if the filesystems are thawed as a result of a
+.B THAW
+request (rather than a dropped connection, timeout, or some other
+problem).  If the client only sends the
+.B THAW
+request once its snapshot is complete, then a
+.B THAWED
+response indicates that the filesystems remained frozen until the
+snapshot was indeed completed and therefore the snapshot is consistent.
+.PP
+The exception is the
+.B KEEPALIVE
+token, which may be sent repeatedly.  After it is first revealed, an
+adversary can hijack the connection and replay the
+.B KEEPALIVE
+token to keep the filesystems frozen indefinitely.  You can recover from
+this by severing the connection somehow, or by sending
+.B rfreezefs
+a signal.  It is therefore recommended that
+.B KEEPALIVE
+tokens not be sent unless necessary.  The timeout is currently set to
+60s, which ought to be adequate for most snapshot mechanisms.
+.SH BUGS
+There ought to be a better one-time-token protocol for keepalives.  I
+want to keep cryptography out of this program, though.
+.SH SEE ALSO
+.BR fsfreeze (8),
+.BR random (4),
+.BR lvm (8),
+.BR ssh (1),
+.BR userv (1).
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
diff --git a/rfreezefs.c b/rfreezefs.c
new file mode 100644
index 0000000..ae023fb
--- /dev/null
+++ b/rfreezefs.c
@@ -0,0 +1,633 @@
+/* -*-c-*-
+ *
+ * Freeze a file system under remote control
+ *
+ * (c) 2011 Mark Wooding
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/select.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#include <linux/fs.h>
+
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include <mLib/alloc.h>
+#include <mLib/dstr.h>
+#include <mLib/base64.h>
+#include <mLib/fdflags.h>
+#include <mLib/mdwopt.h>
+#include <mLib/quis.h>
+#include <mLib/report.h>
+#include <mLib/sub.h>
+#include <mLib/tv.h>
+
+/*----- Magic constants ---------------------------------------------------*/
+
+#define COOKIESZ 16			/* Size of authentication cookie */
+#define TO_CONNECT 30			/* Timeout for incoming connection */
+#define TO_KEEPALIVE 60			/* Timeout between keepalives */
+
+/*----- Utility functions -------------------------------------------------*/
+
+static int getuint(const char *p, const char *q)
+{
+  unsigned long i;
+  int e = errno;
+  char *qq;
+
+  if (!q) q = p + strlen(p);
+  errno = 0;
+  i = strtoul(p, &qq, 0);
+  if (errno || qq < q || i > INT_MAX)
+    die(1, "invalid integer `%s'", p);
+  errno = e;
+  return ((int)i);
+}
+
+#ifdef DEBUG
+#  define D(x) x
+#else
+#  define D(x)
+#endif
+
+/*----- Token management --------------------------------------------------*/
+
+struct token {
+  const char *label;
+  char tok[(COOKIESZ + 2)*4/3 + 1];
+};
+
+#define TOKENS(_)							\
+  _(FREEZE)								\
+  _(FROZEN)								\
+  _(KEEPALIVE)								\
+  _(THAW)								\
+  _(THAWED)
+
+enum {
+#define ENUM(tok) T_##tok,
+  TOKENS(ENUM)
+#undef ENUM
+  T_LIMIT
+};
+
+enum {
+#define MASK(tok) TF_##tok = 1u << T_##tok,
+  TOKENS(MASK)
+#undef ENUM
+  TF_ALL = (1u << T_LIMIT) - 1u
+};
+
+static struct token toktab[] = {
+#define INIT(tok) { #tok },
+  TOKENS(INIT)
+#undef INIT
+  { 0 }
+};
+
+static void inittoks(void)
+{
+  static struct token *t, *tt;
+  unsigned char buf[COOKIESZ];
+  int fd;
+  ssize_t n;
+  base64_ctx bc;
+  dstr d = DSTR_INIT;
+
+  if ((fd = open("/dev/urandom", O_RDONLY)) < 0)
+    die(2, "open (urandom): %s", strerror(errno));
+
+  for (t = toktab; t->label; t++) {
+  again:
+    n = read(fd, buf, COOKIESZ);
+    if (n < 0) die(2, "read (urandom): %s", strerror(errno));
+    else if (n < COOKIESZ) die(2, "read (urandom): short read");
+    base64_init(&bc);
+    base64_encode(&bc, buf, COOKIESZ, &d);
+    base64_encode(&bc, 0, 0, &d);
+    dstr_putz(&d);
+
+    for (tt = toktab; tt < t; tt++) {
+      if (strcmp(d.buf, tt->tok) == 0)
+	goto again;
+    }
+
+    assert(d.len < sizeof(t->tok));
+    memcpy(t->tok, d.buf, d.len + 1);
+    dstr_reset(&d);
+  }
+}
+
+struct tokmatch {
+  unsigned tf;				/* Possible token matches */
+  size_t o;				/* Offset into token string */
+  unsigned f;				/* Flags */
+#define TMF_CR 1u			/*   Seen trailing carriage-return */
+};
+
+static void tokmatch_init(struct tokmatch *tm)
+  { tm->tf = TF_ALL; tm->o = 0; tm->f = 0; }
+
+static int tokmatch_update(struct tokmatch *tm, int ch)
+{
+  const struct token *t;
+  unsigned tf;
+
+  switch (ch) {
+    case '\n':
+      for (t = toktab, tf = 1; t->label; t++, tf <<= 1) {
+	if ((tm->tf & tf) && !t->tok[tm->o])
+	  return (tf);
+      }
+      return (-1);
+    case '\r':
+      for (t = toktab, tf = 1; t->label; t++, tf <<= 1) {
+	if ((tm->tf & tf) && !t->tok[tm->o] && !(tm->f & TMF_CR))
+	  tm->f |= TMF_CR;
+	else
+	  tm->tf &= ~tf;
+      }
+      break;
+    default:
+      for (t = toktab, tf = 1; t->label; t++, tf <<= 1) {
+	if ((tm->tf & tf) && ch != t->tok[tm->o])
+	  tm->tf &= ~tf;
+      }
+      tm->o++;
+      break;
+  }
+  return (0);
+}
+
+static int writetok(unsigned i, int fd)
+{
+  static const char nl = '\n';
+  const struct token *t = &toktab[i];
+  size_t n = strlen(t->tok);
+
+  errno = EIO;
+  if (write(fd, t->tok, n) < n ||
+      write(fd, &nl, 1) < 1)
+    return (-1);
+  return (0);
+}
+
+/*----- Data structures ---------------------------------------------------*/
+
+struct client {
+  struct client *next;			/* Links in the client chain */
+  int fd;				/* File descriptor for socket */
+  struct tokmatch tm;			/* Token matching context */
+};
+
+/*----- Static variables --------------------------------------------------*/
+
+static int *fs;				/* File descriptors for targets */
+static char **fsname;			/* File system names */
+static size_t nfs;			/* Number of descriptors */
+
+/*----- Cleanup -----------------------------------------------------------*/
+
+#define EOM ((char *)0)
+static void emerg(const char *msg,...)
+{
+  va_list ap;
+
+#define MSG(m)								\
+  do { const char *m_ = m; if (write(2, m_, strlen(m_))); } while (0)
+
+  va_start(ap, msg);
+  MSG(QUIS); MSG(": ");
+  do {
+    MSG(msg);
+    msg = va_arg(ap, const char *);
+  } while (msg != EOM);
+  MSG("\n");
+
+#undef MSG
+}
+
+static void partial_cleanup(size_t n)
+{
+  int i;
+  int bad = 0;
+
+  for (i = 0; i < nfs; i++) {
+    if (fs[i] == -1)
+      emerg("not really thawing ", fsname[i], EOM);
+    else if (fs[i] != -2) {
+      if (ioctl(fs[i], FITHAW, 0)) {
+	emerg("VERY BAD!  failed to thaw ",
+	      fsname[i], ": ", strerror(errno), EOM);
+	bad = 1;
+      }
+      close(fs[i]);
+    }
+    fs[i] = -2;
+  }
+  if (bad) _exit(112);
+}
+
+static void cleanup(void) { partial_cleanup(nfs); }
+
+static int sigcatch[] = {
+  SIGINT, SIGQUIT, SIGTERM, SIGHUP, SIGALRM,
+  SIGILL, SIGSEGV, SIGBUS, SIGFPE, SIGABRT
+};
+
+static void sigmumble(int sig)
+{
+  sigset_t ss;
+
+  cleanup();
+  emerg(strsignal(sig), 0);
+
+  signal(sig, SIG_DFL);
+  sigemptyset(&ss); sigaddset(&ss, sig);
+  sigprocmask(SIG_UNBLOCK, &ss, 0);
+  raise(sig);
+  _exit(4);
+}
+
+/*----- Help functions ----------------------------------------------------*/
+
+static void version(FILE *fp) { pquis(fp, "$, version " VERSION "\n"); }
+static void usage(FILE *fp)
+  { pquis(fp, "Usage: $ [-n] [-a ADDR] [-p LOPORT[-HIPORT]] FILSYS ...\n"); }
+
+static void help(FILE *fp)
+{
+  version(fp); putc('\n', fp);
+  usage(fp);
+  fputs("\n\
+Freezes a filesystem temporarily, with some measure of safety.\n\
+\n\
+The program listens for connections on a TCP port, and prints a line\n\
+\n\
+	PORT COOKIE\n\
+\n\
+to standard output.  You must connect to this PORT and send the COOKIE\n\
+followed by a newline within a short period of time.  The filesystems\n\
+will then be frozen, and `OK' written to the connection.  In order to\n\
+keep the file system frozen, you must keep the connection open, and\n\
+feed data into it.  If the connection closes, or no data is received\n\
+within a set period of time, or the program receives one of a variety\n\
+of signals or otherwise becomes unhappy, the filesystems are thawed again.\n\
+\n\
+Options:\n\
+\n\
+-h, --help			Print this help text.\n\
+-v, --version			Print the program version number.\n\
+-u, --usage			Print a short usage message.\n\
+\n\
+-a, --address=ADDR		Listen only on ADDR.\n\
+-n, --not-really		Don't really freeze or thaw filesystems.\n\
+-p, --port-range=LO[-HI]	Select a port number between LO and HI.\n\
+				  If HI is omitted, choose only LO.\n\
+", fp);
+}
+
+/*----- Main program ------------------------------------------------------*/
+
+int main(int argc, char *argv[])
+{
+  char buf[256];
+  int loport = -1, hiport = -1;
+  int sk, fd, maxfd;
+  struct sockaddr_in sin;
+  socklen_t sasz;
+  struct hostent *h;
+  const char *p, *q;
+  struct timeval now, when, delta;
+  struct client *clients = 0, *c, **cc;
+  const struct token *t;
+  struct tokmatch tm;
+  fd_set fdin;
+  int i;
+  ssize_t n;
+  unsigned f = 0;
+#define f_bogus 0x01u
+#define f_notreally 0x02u
+
+  ego(argv[0]);
+  sub_init();
+
+  /* --- Partially initialize the socket address --- */
+
+  sin.sin_family = AF_INET;
+  sin.sin_addr.s_addr = INADDR_ANY;
+  sin.sin_port = 0;
+
+  /* --- Parse the command line --- */
+
+  for (;;) {
+    static struct option opts[] = {
+      { "help",		0,		0,	'h' },
+      { "version",	0,		0,	'v' },
+      { "usage",	0,		0,	'u' },
+      { "address",	OPTF_ARGREQ,	0,	'a' },
+      { "not-really",	0,		0,	'n' },
+      { "port-range",	OPTF_ARGREQ,	0,	'p' },
+      { 0,		0,		0,	0 }
+    };
+
+    if ((i = mdwopt(argc, argv, "hvua:np:", opts, 0, 0, 0)) < 0) break;
+    switch (i) {
+      case 'h': help(stdout); exit(0);
+      case 'v': version(stdout); exit(0);
+      case 'u': usage(stdout); exit(0);
+      case 'a':
+	if ((h = gethostbyname(optarg)) == 0) {
+	  die(1, "failed to resolve address `%s': %s",
+	      optarg, hstrerror(h_errno));
+	}
+	if (h->h_addrtype != AF_INET)
+	  die(1, "unexpected address type resolving `%s'", optarg);
+	assert(h->h_length == sizeof(sin.sin_addr));
+	memcpy(&sin.sin_addr, h->h_addr, sizeof(sin.sin_addr));
+	break;
+      case 'n': f |= f_notreally; break;
+      case 'p':
+	if ((p = strchr(optarg, '-')) == 0)
+	  loport = hiport = getuint(optarg, 0);
+	else {
+	  loport = getuint(optarg, p);
+	  hiport = getuint(p + 1, 0);
+	}
+	break;
+      default: f |= f_bogus; break;
+    }
+  }
+  if (f & f_bogus) { usage(stderr); exit(1); }
+  if (optind >= argc) { usage(stderr); exit(1); }
+
+  /* --- Open the file systems --- */
+
+  nfs = argc - optind;
+  fsname = &argv[optind];
+  fs = xmalloc(nfs*sizeof(*fs));
+  for (i = 0; i < nfs; i++) {
+    if ((fs[i] = open(fsname[i], O_RDONLY)) < 0)
+      die(2, "open (%s): %s", fsname[i], strerror(errno));
+  }
+
+  if (f & f_notreally) {
+    for (i = 0; i < nfs; i++) {
+      close(fs[i]);
+      fs[i] = -1;
+    }
+  }
+
+  /* --- Generate random tokens --- */
+
+  inittoks();
+
+  /* --- Create the listening socket --- */
+
+  if ((sk = socket(PF_INET, SOCK_STREAM, 0)) < 0)
+    die(2, "socket: %s", strerror(errno));
+  i = 1;
+  if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i)))
+    die(2, "setsockopt (reuseaddr): %s", strerror(errno));
+  if (fdflags(sk, O_NONBLOCK, O_NONBLOCK, FD_CLOEXEC, FD_CLOEXEC))
+    die(2, "fdflags: %s", strerror(errno));
+  if (loport < 0 || loport == hiport) {
+    if (loport >= 0) sin.sin_port = htons(loport);
+    if (bind(sk, (struct sockaddr *)&sin, sizeof(sin)))
+      die(2, "bind: %s", strerror(errno));
+  } else if (hiport != loport) {
+    for (i = loport; i <= hiport; i++) {
+      sin.sin_port = htons(i);
+      if (bind(sk, (struct sockaddr *)&sin, sizeof(sin)) >= 0) break;
+      else if (errno != EADDRINUSE)
+	die(2, "bind: %s", strerror(errno));
+    }
+    if (i > hiport) die(2, "bind: all ports in use");
+  }
+  if (listen(sk, 5)) die(2, "listen: %s", strerror(errno));
+
+  /* --- Tell the caller how to connect to us, and start the timer --- */
+
+  sasz = sizeof(sin);
+  if (getsockname(sk, (struct sockaddr *)&sin, &sasz))
+    die(2, "getsockname (listen): %s", strerror(errno));
+  printf("PORT %d\n", ntohs(sin.sin_port));
+  for (t = toktab; t->label; t++)
+    printf("TOKEN %s %s\n", t->label, t->tok);
+  printf("READY\n");
+  if (fflush(stdout) || ferror(stdout))
+    die(2, "write (stdout, rubric): %s", strerror(errno));
+  gettimeofday(&now, 0); TV_ADDL(&when, &now, TO_CONNECT, 0);
+
+  /* --- Collect incoming connections, and check for the cookie --- *
+   *
+   * This is the tricky part.
+   */
+
+  for (;;) {
+    FD_ZERO(&fdin);
+    FD_SET(sk, &fdin);
+    maxfd = sk;
+    for (c = clients; c; c = c->next) {
+      FD_SET(c->fd, &fdin);
+      if (c->fd > maxfd) maxfd = c->fd;
+    }
+    TV_SUB(&delta, &when, &now);
+    if (select(maxfd + 1, &fdin, 0, 0, &delta) < 0)
+      die(2, "select (accept): %s", strerror(errno));
+    gettimeofday(&now, 0);
+
+    if (TV_CMP(&now, >=, &when)) die(3, "timeout (accept)");
+
+    if (FD_ISSET(sk, &fdin)) {
+      sasz = sizeof(sin);
+      fd = accept(sk, (struct sockaddr *)&sin, &sasz);
+      if (fd >= 0) {
+	if (fdflags(fd, O_NONBLOCK, O_NONBLOCK, FD_CLOEXEC, FD_CLOEXEC) < 0)
+	  die(2, "fdflags: %s", strerror(errno));
+	c = CREATE(struct client);
+	c->next = clients; c->fd = fd; tokmatch_init(&c->tm);
+	clients = c;
+      }
+#ifdef DEBUG
+      else if (errno != EAGAIN)
+	moan("accept: %s", strerror(errno));
+#endif
+    }
+
+    for (cc = &clients; *cc;) {
+      c = *cc;
+      if (!FD_ISSET(c->fd, &fdin)) goto next_client;
+      n = read(c->fd, buf, sizeof(buf));
+      if (!n) goto disconn;
+      else if (n < 0) {
+	if (errno == EAGAIN) goto next_client;
+	D( moan("read (client; auth): %s", strerror(errno)); )
+	goto disconn;
+      } else {
+	for (p = buf, q = p + n; p < q; p++) {
+	  switch (tokmatch_update(&c->tm, *p)) {
+	    case 0: break;
+	    case TF_FREEZE: goto connected;
+	    default:
+	      D( moan("bad token from client"); )
+	      goto disconn;
+	  }
+	}
+      }
+
+    next_client:
+      cc = &c->next;
+      continue;
+
+    disconn:
+      close(c->fd);
+      *cc = c->next;
+      DESTROY(c);
+      continue;
+    }
+  }
+
+connected:
+  close(sk); sk = fd;
+  while (clients) {
+    if (clients->fd != sk) close(clients->fd);
+    c = clients->next;
+    DESTROY(clients);
+    clients = c;
+  }
+
+  /* --- Establish signal handlers --- *
+   *
+   * Hopefully this will prevent bad things happening if we have an accident.
+   */
+
+  for (i = 0; i < sizeof(sigcatch)/sizeof(sigcatch[0]); i++) {
+    if (signal(sigcatch[i], sigmumble) == SIG_ERR)
+      die(2, "signal (%d): %s", i, strerror(errno));
+  }
+  atexit(cleanup);
+
+  /* --- Prevent the OOM killer from clobbering us --- */
+
+  if ((fd = open("/proc/self/oom_adj", O_WRONLY)) < 0 ||
+      write(fd, "-17\n", 4) < 4 ||
+      close(fd))
+    die(2, "set oom_adj: %s", strerror(errno));
+
+  /* --- Actually freeze the filesystem --- */
+
+  for (i = 0; i < nfs; i++) {
+    if (fs[i] == -1)
+      moan("not really freezing %s", fsname[i]);
+    else {
+      if (ioctl(fs[i], FIFREEZE, 0) < 0) {
+	partial_cleanup(i);
+	die(2, "ioctl (freeze %s): %s", fsname[i], strerror(errno));
+      }
+    }
+  }
+  if (writetok(T_FROZEN, sk)) {
+    cleanup();
+    die(2, "write (frozen): %s", strerror(errno));
+  }
+
+  /* --- Now wait for the other end to detach --- */
+
+  tokmatch_init(&tm);
+  TV_ADDL(&when, &now, TO_KEEPALIVE, 0);
+  for (p++; p < q; p++) {
+    switch (tokmatch_update(&tm, *p)) {
+      case 0: break;
+      case TF_KEEPALIVE: tokmatch_init(&tm); break;
+      case TF_THAW: goto done;
+      default: cleanup(); die(3, "unknown token (keepalive)");
+    }
+  }
+  for (;;) {
+    FD_ZERO(&fdin);
+    FD_SET(sk, &fdin);
+    TV_SUB(&delta, &when, &now);
+    if (select(sk + 1, &fdin, 0, 0, &delta) < 0) {
+      cleanup();
+      die(2, "select (keepalive): %s", strerror(errno));
+    }
+
+    gettimeofday(&now, 0);
+    if (TV_CMP(&now, >, &when)) {
+      cleanup(); die(3, "timeout (keepalive)");
+    }
+    if (FD_ISSET(sk, &fdin)) {
+      n = read(sk, buf, sizeof(buf));
+      if (!n) { cleanup(); die(3, "end-of-file (keepalive)"); }
+      else if (n < 0) {
+	if (errno == EAGAIN) ;
+	else {
+	  cleanup();
+	  die(2, "read (client, keepalive): %s", strerror(errno));
+	}
+      } else {
+	for (p = buf, q = p + n; p < q; p++) {
+	  switch (tokmatch_update(&tm, *p)) {
+	    case 0: break;
+	    case TF_KEEPALIVE:
+	      TV_ADDL(&when, &now, TO_KEEPALIVE, 0);
+	      tokmatch_init(&tm);
+	      break;
+	    case TF_THAW:
+	      goto done;
+	    default:
+	      cleanup();
+	      die(3, "unknown token (keepalive)");
+	  }
+	}
+      }
+    }
+  }
+
+done:
+  cleanup();
+  if (writetok(T_THAWED, sk))
+    die(2, "write (thaw): %s", strerror(errno));
+  close(sk);
+  return (0);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/rmt.c b/rmt.c
new file mode 100644
index 0000000..8861686
--- /dev/null
+++ b/rmt.c
@@ -0,0 +1,449 @@
+/* -*-c-*-
+ *
+ * Fake rmt(8) server for hashing and storing files
+ *
+ * (c) 2010 Mark Wooding
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <getopt.h>
+
+#include <pwd.h>
+
+#include <mLib/dstr.h>
+#include <mLib/quis.h>
+#include <mLib/report.h>
+
+#include <nettle/sha.h>
+
+/*----- Configuration -----------------------------------------------------*/
+
+#ifndef BKP
+#  define BKP "/mnt/bkp"
+#endif
+
+/*----- Main code ---------------------------------------------------------*/
+
+#define BUFSZ 10240
+#define LSEEK_GET_TAPEPOS 10
+#define LSEEK_GO2_TAPEPOS 11
+
+struct flag {
+  const char *name;
+  int f;
+};
+
+static const struct flag openflag[] = {
+  /* ;;; Emacs Lisp to generate the table below.  Place your cursor just
+     ;;; after the closing `)' and press C-x C-e.
+
+     (let ((flags '(rdonly wronly rdwr creat excl trunc nonblock ndelay
+		    noctty append dsync rsync sync cloexec async
+		    direct noatime nofollow shlock exlock defer)))
+       (save-excursion
+	 (goto-char (point-min))
+	 (search-forward (concat "***" "BEGIN openflag" "***"))
+	 (beginning-of-line 2)
+	 (delete-region (point)
+			(progn
+			  (search-forward "***END***")
+			  (beginning-of-line)
+			  (point)))
+       (dolist (f (sort (copy-list flags) #'string<))
+	 (let ((up (upcase (symbol-name f))))
+	   (insert (format "#ifdef O_%s\n" up))
+	   (insert (format "  { \"%s\", O_%s },\n" up up))
+	   (insert "#endif\n")))))
+  */
+  /***BEGIN openflag***/
+#ifdef O_APPEND
+  { "APPEND", O_APPEND },
+#endif
+#ifdef O_ASYNC
+  { "ASYNC", O_ASYNC },
+#endif
+#ifdef O_CLOEXEC
+  { "CLOEXEC", O_CLOEXEC },
+#endif
+#ifdef O_CREAT
+  { "CREAT", O_CREAT },
+#endif
+#ifdef O_DEFER
+  { "DEFER", O_DEFER },
+#endif
+#ifdef O_DIRECT
+  { "DIRECT", O_DIRECT },
+#endif
+#ifdef O_DSYNC
+  { "DSYNC", O_DSYNC },
+#endif
+#ifdef O_EXCL
+  { "EXCL", O_EXCL },
+#endif
+#ifdef O_EXLOCK
+  { "EXLOCK", O_EXLOCK },
+#endif
+#ifdef O_NDELAY
+  { "NDELAY", O_NDELAY },
+#endif
+#ifdef O_NOATIME
+  { "NOATIME", O_NOATIME },
+#endif
+#ifdef O_NOCTTY
+  { "NOCTTY", O_NOCTTY },
+#endif
+#ifdef O_NOFOLLOW
+  { "NOFOLLOW", O_NOFOLLOW },
+#endif
+#ifdef O_NONBLOCK
+  { "NONBLOCK", O_NONBLOCK },
+#endif
+#ifdef O_RDONLY
+  { "RDONLY", O_RDONLY },
+#endif
+#ifdef O_RDWR
+  { "RDWR", O_RDWR },
+#endif
+#ifdef O_RSYNC
+  { "RSYNC", O_RSYNC },
+#endif
+#ifdef O_SHLOCK
+  { "SHLOCK", O_SHLOCK },
+#endif
+#ifdef O_SYNC
+  { "SYNC", O_SYNC },
+#endif
+#ifdef O_TRUNC
+  { "TRUNC", O_TRUNC },
+#endif
+#ifdef O_WRONLY
+  { "WRONLY", O_WRONLY },
+#endif
+  /***END***/
+  { 0, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+  int ch;
+  dstr d = DSTR_INIT, dd = DSTR_INIT;
+  unsigned char buf[BUFSZ];
+  int fd = -1, hfd = -1;
+  off_t rc;
+  off_t off = 0;
+  struct passwd *pw;
+  uid_t u;
+  unsigned f = 0;
+#define f_bogus 1u
+  const char *p = 0;
+  const char *bkp = 0, *host = 0;
+  struct sha256_ctx hc;
+
+  ego(argv[0]);
+  setvbuf(stdin, 0, _IONBF, 0);
+  signal(SIGPIPE, SIG_IGN);
+
+  for (;;) {
+    int o = getopt(argc, argv, "H:r:");
+    if (o < 0) break;
+    switch (o) {
+      case 'H': host = optarg; break;
+      case 'r': bkp = optarg; break;
+      default: f |= f_bogus; break;
+    }
+  }
+  argc -= optind; argv += optind;
+  if ((f & f_bogus) || argc) {
+    pquis(stderr, "usage: $ [-r ROOT] [-H HOST]\n");
+    exit(1);
+  }
+
+  if (!bkp) bkp = getenv("BKP");
+  if (!bkp) bkp = BKP;
+
+  if (!host) host = getenv("BKP_HOST");
+
+  if (!host) {
+    p = getenv("USER");
+    if (!p) p = getenv("LOGNAME");
+    if (!p) {
+      u = getuid();
+      if ((pw = getpwuid(u)) == 0)
+	die(1, "no passwd entry (you don't exist?)");
+      p = pw->pw_name;
+    }
+    if (strncmp(p, "bkp-", 4) != 0) {
+      die(1, "can't deduce host name: "
+	  "login name `%s' doesn't begin with `bkp-'",
+	  p);
+    }
+    host = p + 4;
+  }
+
+  for (;;) {
+    if (fflush(stdout)) goto fail;
+    ch = getchar();
+    if (ch == EOF) break;
+    DRESET(&d); DRESET(&dd); rc = 0;
+
+#define CHECKFD do { if (fd < 0) goto badf; } while (0)
+#define ERROR(what) do moan(what ": %s", strerror(errno)); while (0)
+#define ERROR1(what, arg) \
+  do moan(what ": %s", arg, strerror(errno)); while (0)
+#define ARG(d) do {							\
+  if (dstr_putline(&d, stdin) == EOF || ferror(stdin) || feof(stdin))	\
+    { moan("read (stdin)", strerror(errno)); goto fail; }		\
+} while (0)
+
+#define SKIPWS do { while (isspace((unsigned char)*p)) p++; } while (0)
+
+    switch (ch) {
+
+      case 'O': {
+	/* Ofile\nmode\n -- open file */
+
+	const struct flag *ff;
+	char *p, *q;
+	size_t n;
+	long mode, f;
+
+	ARG(d); ARG(dd);
+	if (fd >= 0 && close(fd)) ERROR("close (fd)");
+	if (hfd >= 0 && close(hfd)) ERROR("close (hash)");
+	fd = hfd = -1;
+
+	if (chdir(bkp) || chdir(host)) ERROR1("chdir (%s)", host);
+	p = d.buf;
+	if ((q = strchr(p, '/')) == 0)
+	  { moan("bad path: missing `/')"); goto inval; }
+	*q++ = 0;
+	if (chdir(p) || chdir("prepare/incoming")) ERROR1("chdir (%s)", p);
+
+	memmove(d.buf, q, d.len - (q - d.buf) + 1);
+	d.len -= q - d.buf;
+
+	errno = 0;
+	mode = strtol(dd.buf, &p, 0);
+	if (errno) ERROR("bad mode");
+	else if (mode < 0 || mode > INT_MAX)
+	  { moan("bad mode: range"); goto range; }
+	SKIPWS;
+	if (!*p) {
+	  switch (mode & O_ACCMODE) {
+	    case O_RDONLY: mode = O_RDONLY; break;
+	    case O_WRONLY: mode = O_WRONLY | O_TRUNC | O_CREAT; break;
+	    case O_RDWR: mode = O_RDWR;
+	    default: moan("bad mode: unknown access type"); goto inval;
+	  }
+	} else {
+	  mode = 0;
+	  for (;;) {
+	    if (p[0] == 'O' && p[1] == '_') {
+	      p += 2;
+	      n = strcspn(p, " \t|");
+	      for (ff = openflag; ff->name; ff++) {
+		if (strncmp(p, ff->name, n) == 0 && !ff->name[n])
+		  goto ofmatch;
+	      }
+	      moan("bad mode: unknown flag O_%.*s", (int)n, p);
+	      goto inval;
+	    ofmatch:
+	      mode |= ff->f;
+	      p += n;
+	    } else if (isdigit((unsigned long)*p)) {
+	      errno = 0;
+	      f = strtol(p, &p, 0);
+	      if (errno) ERROR("bad mode");
+	      else if (f < 0 || f > INT_MAX)
+		{ moan("bad mode: range"); goto range; }
+	      mode |= f;
+	    } else {
+	      moan("bad mode: unexpected token");
+	      goto inval;
+	    }
+	    SKIPWS;
+	    if (!*p)
+	      break;
+	    else if (*p != '|') {
+	      moan("bad mode: expected `|'");
+	      goto inval;
+	    }
+	    p++;
+	    SKIPWS;
+	  }
+	}
+
+	if ((fd = open(d.buf, mode, 0666)) < 0) ERROR1("open (%s)", d.buf);
+	if ((mode & O_ACCMODE) == O_WRONLY) {
+	  DPUTS(&d, ".hash");
+	  if ((hfd = open(d.buf,
+			 mode & (O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC),
+			 0666)) < 0) {
+	    close(fd); fd = -1;
+	    ERROR1("open (%s)", d.buf);
+	  }
+	  sha256_init(&hc);
+	}
+	off = 0;
+      } break;
+
+      case 'C': {
+	/* Chunoz\n -- close file */
+
+	uint8_t h[SHA256_DIGEST_SIZE], *p;
+	char hex[SHA256_DIGEST_SIZE * 2 + 1], *q;
+	unsigned i;
+
+	ARG(d); CHECKFD;
+	if (close(fd)) ERROR("close (fd)");
+	fd = -1;
+	if (hfd >= 0) {
+	  sha256_digest(&hc, sizeof(h), h);
+	  for (p = h, q = hex; p < h + sizeof(h); p++, q += 2)
+	    sprintf(q, "%02x", *p);
+	  *q++ = '\n';
+	  errno = EIO;
+	  if (write(hfd, hex, sizeof(hex)) < sizeof(hex) || close(hfd))
+	    ERROR("close (hash)");
+	  hfd = -1;
+	}
+      } break;
+
+      case 'L': {
+	/* Loffset\nwhence\n -- seek
+	 *  (warning: the manual page gets these the wrong way round)
+	 */
+
+	int whence;
+	off_t offset;
+
+	ARG(d); ARG(dd); CHECKFD;
+	offset = atoi(d.buf); whence = strtoull(dd.buf, 0, 0);
+	switch (whence) {
+	  case LSEEK_GET_TAPEPOS: whence = SEEK_CUR; offset = 0; break;
+	  case LSEEK_GO2_TAPEPOS: whence = SEEK_SET; break;
+	}
+	switch (whence) {
+	  case SEEK_CUR:
+	    if (!offset) { rc = off; break; }
+	  default:
+	    rc = lseek(fd, offset, whence);
+	    if (rc == (off_t)-1) ERROR("seek");
+	    off = rc;
+	    break;
+	}
+      } break;
+
+      case 'W': {
+	/* Wlen\ndata... -- write */
+
+	size_t n, nn, sz;
+	ssize_t ssz;
+	unsigned char *p;
+	int botch = 0;
+
+	ARG(d); CHECKFD;
+	rc = sz = strtoul(d.buf, 0, 0);
+	while (sz) {
+	  nn = sz > BUFSZ ? BUFSZ : sz;
+	  n = fread(buf, 1, nn, stdin);
+	  if (n < nn) {
+	    if (feof(stdin)) { moan("eof on stdin"); goto fail;}
+	    else ERROR("read (stdin)");
+	  }
+	  if (hfd >= 0) sha256_update(&hc, n, buf);
+	  p = buf;
+	  while (!botch && n) {
+	    if ((ssz = write(fd, p, n)) > 0) {
+	      p += ssz; off += ssz; n -= ssz;
+	    } else if (!ssz) { moan("zero-length write"); goto fail; }
+	    else if (errno != EINTR) { botch = errno; }
+	  }
+	  sz -= nn;
+	}
+	if (botch) { errno = botch; ERROR("write"); }
+      } break;
+
+      case 'R': {
+	/* Rlen\n -- read */
+
+	size_t nn;
+	ssize_t ssz;
+
+	ARG(d); CHECKFD;
+	nn = strtoul(d.buf, 0, 0); if (nn > BUFSZ) nn = BUFSZ;
+	if ((ssz = read(fd, buf, nn)) < 0) ERROR("read");
+	off += ssz;
+	printf("A%ld\n", (long)ssz);
+	if (fwrite(buf, 1, ssz, stdout) < ssz)
+	  { moan("write (stdout): %s", strerror(errno)); goto fail; }
+	continue;
+      } break;
+
+      case 'i': case 'I':
+	/* Iop\ncount\n -- ioctl */
+	ARG(d); ARG(dd); CHECKFD; goto notty;
+
+      case 'S':
+	/* S -- ioctl */
+	CHECKFD; goto notty;
+      case 's':
+	/* sop -- ioctl */
+
+	if ((ch = getchar()) == EOF) goto fail;
+	CHECKFD; goto notty;
+
+      default:
+	goto fail;
+    }
+
+    printf("A%llu\n", (unsigned long long)rc);
+    continue;
+
+  badf: errno = EBADF; goto error;
+  range: errno = ERANGE; goto error;
+  inval: errno = EINVAL; goto error;
+  notty: errno = ENOTTY; goto error;
+  error:
+    printf("E%d\n%s\n", errno, strerror(errno));
+    continue;
+  }
+  if (fflush(stdout) || ferror(stdout) || ferror(stdin)) goto fail;
+  return (0);
+fail:
+  return (1);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/snap.8.in b/snap.8.in
new file mode 100644
index 0000000..fe1070d
--- /dev/null
+++ b/snap.8.in
@@ -0,0 +1,172 @@
+.TH snap 8 "6 November 2011" "distorted.org.uk backup"
+.SH NAME
+snap \- create and remove snapshot devices
+.SH SYNOPSIS
+.B snap
+.RB [ \-u ]
+.RB [ \-c
+.IR file ]
+.I device
+.RI [ key \c
+.BI = value
+\&...]
+.SH DESCRIPTION
+The
+.B snap
+utility manages device-level snapshots in a mechanism-independent way.
+It's intended to be used as part of automated filesystem maintenance
+activities, such as backups or online filesystem checking.
+.PP
+The command line options are as follows.
+.TP
+.B "\-h, \-\-help"
+Print a help message to standard output and exit with status zero.
+.TP
+.B "\-v, \-\-version"
+Print the program's version number to standard output and exit with
+status zero.
+.TP
+.BI "\-c, \-\-config-file=" file
+Read configuration from
+.I
+file
+rather than
+.BR @sysconfdir@/snaptab .
+.TP
+.B "\-u, \-\-unsnap"
+Remove a snapshot, rather than creating a new one.  Strictly speaking,
+this just passes the option
+.B op=unsnap
+to the handler, though the conventional interpretation is to remove the
+snapshot.
+.SS Operation
+An
+.B op
+option is synthesized from the command-line options.  Specifically, if the
+.B \-u
+as given, then
+.B op=unsnap
+is set; otherwise
+.B op=snap
+is assumed.
+.PP
+The
+.B snap
+program looks up the
+.I device
+in the configuration file \(en either
+.B /etc/snaptab
+or the
+.I file
+named by the
+.B \-u
+option \(en and retrieves a snapshot
+.I type
+and some options.  See
+.BR snaptab (5)
+for the details of the file format and the process of constructing the
+options list.  The
+.I device
+is usually the name of a device node, relative to
+.BR /dev ,
+though specific handler programs may have their own conventions.
+.PP
+The option list from the configuration file, the synthesized
+.B op
+option, and the command-line option list, are concatenated, in that
+order; then, if the resulting list contains two or more options with the
+same
+.I key
+then only the last is retained.  (The
+.I key
+is the portion of the option before the first
+.RB ` = '
+character.)
+.PP
+Finally, the snapshot handler for the selected
+.I type
+is invoked, as
+.IP
+.BI @snaplibexecdir@/snap. type
+.I device
+.IR key = value
+\&...
+.SS Handler conventions
+Much of the behaviour of
+.B snap
+is left up to individual type-specific handler programs.  In order to
+maintain consistency, the following conventions are adopted.
+.PP
+Options are processed strictly left-to-right.  Each option is parsed as
+.IP
+.IR key [\fB. type ]\fB= value
+.PP
+where the
+.I key
+and
+.I type
+do not contain
+.RB ` = '
+characters, and the
+.I type
+does not contain a
+.RB ` . '
+character.  If the
+.I type
+is omitted, or is equal to the handler's type, then the option is
+processed; the
+.I type
+suffix is otherwise ignored; an error is reported if the
+.I key
+is unrecognized.  Options bearing a different type are silently ignored.
+If the same
+.I key
+occurs more than once, only the last occurrence is significant.
+.PP
+Two options are always recognized.
+.TP
+.B op
+Synthesized by the
+.B snap
+program.  The value is
+.B snap
+if a snapshot is to be created, or
+.B unsnap
+if it is to be removed (the
+.B \-u
+option was given).  It is permissible for handlers to define meanings
+for other
+.B op
+values; unrecognized values are an error.
+.TP
+.B tag
+A short arbitrary string which is assigned to the snapshot to
+distinguish it from snapshots created by other clients.  The acceptable
+form for the tag may vary with the snapshot type, but alphanumerics and
+hyphens are always allowed.  This option may be omitted, though this is
+discouraged in scripted use: it is not specified whether this is
+equivalent to providing some default tag.
+.PP
+If the
+.B op
+is
+.B snap
+then the handler should print the (full) pathname of the block device
+containing the snapshot to standard output, followed by a newline.  If
+.B op
+is
+.B unsnap
+then the handler should print nothing to standard output.  Other
+.B op
+values may cause the handler to produce output at its discretion.
+.SH BUGS
+The
+.B snap
+program doesn't even try to handle filesystem-level snapshots, as you'd
+get in ZFS or BtrFS.  Trying to do both device- and filesystem-level
+snapshots in one program leads to all sorts of difficulties, and it's
+probably a mistake to try.  The distinction is still annoying, though.
+.SH SEE ALSO
+.BR snaptab (5).
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
diff --git a/snap.in b/snap.in
new file mode 100755
index 0000000..34e0be5
--- /dev/null
+++ b/snap.in
@@ -0,0 +1,150 @@
+#! @PERL@
+### -*-perl-*-
+###
+### Create and remove snapshots of block devices
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
+use Text::ParseWords;
+
+our $VERSION = "@VERSION@";
+
+our %C = ( etc		=> "@sysconfdir@",
+	   sbin		=> "@sbindir@",
+	   snap		=> "@snaplibexecdir@" );
+
+###--------------------------------------------------------------------------
+### Utilities.
+
+(our $QUIS = $0) =~ s:^.*/::;
+sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
+sub fail ($) { my ($msg) = @_; whine $msg; exit $! || ($? >> 8) || 255; }
+
+###--------------------------------------------------------------------------
+### Parse command line.
+
+our $USAGE = "usage: $QUIS [-u] [-c FILE] DEVICE [KEY=VALUE ...]";
+sub version { print "$QUIS, version $VERSION\n"; }
+sub help {
+  print <<EOF;
+$USAGE
+
+Options:
+  -h, --help		Show this help text.
+  -v, --version		Show the program version number.
+  -c, --config=FILE	Use configuration FILE, not $CONF.
+  -n, --no-act		Don't actually do anything; show what would be done.
+  -u, --unsnap		Remove a snapshot taken earlier.
+EOF
+}
+
+our $CONF = "$C{etc}/snaptab";
+our $OP = "snap";
+our $NOACT = 0;
+GetOptions('help|h|?'		=> sub { version; help; exit; },
+	   'version|v'		=> sub { version; exit; },
+	   'config-file|c=s'	=> \$CONF,
+	   'no-act|n'		=> \$NOACT,
+	   'unsnap|u'		=> sub { $OP = "unsnap"; })
+  and @ARGV >= 1
+  or do { print STDERR $USAGE, "\n"; exit 1; };
+
+our $DEV = shift;
+our $TYPE = undef;
+
+###--------------------------------------------------------------------------
+### Parse the configuration file.
+
+open CF, "<", $CONF or fail "open config ($CONF): $!";
+our @KV = ();
+our %DEF = ();
+while (my $line = <CF>) {
+  chomp $line;
+  while ($line =~ /\\\s*$/) {
+    chomp (my $more = <CF>);
+    $line =~ s/\\\s*$/$more/;
+  }
+  next if $line =~ /^\s*(\#|$)/;
+  my ($dev, $type, @opts) = shellwords $line;
+  my @nopts = ();
+  for my $i (@opts) {
+    if ($i !~ /^\*\.(.+)$/) { push @nopts, $i; next; }
+    my $ty = $1;
+    for my $o (@{$DEF{$ty}}) {
+      $o =~ /^([^=]+)=(.*)$/;
+      my ($k, $v) = ($1, $2);
+      ($k, $ty) = ($1, $2) if $k =~ /^(.+)\.([^.]+)/;
+      push @nopts, "$k.$ty=$v";
+    }
+  }
+  @opts = @nopts;
+  if ($dev eq "*") { push @{$DEF{$type}}, @opts; }
+  elsif ($dev eq $DEV) { push @KV, "type=$type", @{$DEF{$type}}, @opts; }
+}
+close CF or fail "close config ($CONF): $!";
+
+###--------------------------------------------------------------------------
+### Pick out the winning options.
+
+our @OPT = ();
+my $seen = ();
+
+for my $i (reverse @KV, "op=$OP", @ARGV) {
+  $i =~ /^([^=]+)=(.*)$/ or fail "malformed option `$i': missing `='";
+  my ($k, $v) = ($1, $2);
+  unless (exists $seen{$k}) {
+    $seen{$k} = 1;
+    if ($k eq "type") { $TYPE = $v; }
+    else { push @OPT, "$k=$v"; }
+  }
+}
+
+defined $TYPE or fail "no snapshot type for device `$DEV'";
+@OPT = reverse @OPT;
+
+###--------------------------------------------------------------------------
+### Invoke the type-specific handler.
+
+## Fix up the path, to make sure our tools are available.
+my $path = $ENV{PATH};
+my %path = map { $_ => 1 } split /:/, $path;
+for my $p (qw( /bin /sbin /usr/bin /usr/sbin ), $C{sbin}) {
+  $path = "$p:$path" unless exists $path{$p};
+}
+$ENV{PATH} = $path;
+
+## Prepare the arguments.
+my @args = ("$C{snap}/snap.$TYPE", $DEV, @OPT);
+
+## Do the job.
+if ($NOACT) {
+  whine "run " . join(" ",
+		      map { "`$_'" }
+		      grep { s/'/\\'/g; 1 }
+		      (my @x = @args));
+} else {
+  exec @args;
+  fail "exec (snap.$TYPE): $!";
+}
+
+###----- That's all, folks --------------------------------------------------
+
+exit 0;
diff --git a/snap.lvm b/snap.lvm
new file mode 100755
index 0000000..65d7d9f
--- /dev/null
+++ b/snap.lvm
@@ -0,0 +1,105 @@
+#! /bin/sh
+###
+### Establish snapshots of LVM logical volumes
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+set -e
+quis=${0##*/}
+
+###--------------------------------------------------------------------------
+### Parse the command line.
+
+## Provide help or version information.
+usage="usage: $quis DEVICE [KEY=VALUE ...]"
+version="$quis, version 1.0.0"
+case "$#,$1" in
+  0,*) echo >&2 "$usage"; exit 1 ;;
+  *,-v | *,--version) echo "$version"; exit ;;
+  *,-h | *,--help)
+    cat <<EOF
+$version
+$usage
+
+Option keys:
+  op=OPERATION          \`snap' to create snapshot, or \`unsnap' to remove.
+  snapsz=SIZE		Size to reserve for snapshot storage.
+  tag=TAG               Disambiguation tag to append to logical volume name.
+EOF
+    exit
+    ;;
+esac
+
+## Scan the option keys.
+dev=$1; shift
+Oop=snap Otag=snap Osnapsz=100M
+win=t
+for i in "$@"; do
+  case "$i" in
+    ?*=*) ;;
+    *) echo >&2 "$quis: malformed option \`$i'"; exit 1 ;;
+  esac
+  k=${i%%=*} v=${i#*=}
+  case "$k" in *.lvm) k=${k%.lvm} ;; ?*.?*) continue ;; esac
+  case "$k" in
+    op | tag | snapsz) eval "O$k=\$v" ;;
+    *) echo >&2 "$quis: unknown option \`$k'"; win=nil ;;
+  esac
+done
+case $win in nil) exit 1 ;; esac
+
+## Check the device name.
+case "$dev" in
+  ?*/?*) ;; *) echo >&2 "$quis: device \`$dev' should be VG/LV"; exit 1 ;;
+esac
+vg=${dev%%/*} lv=${dev#*/}
+
+###--------------------------------------------------------------------------
+### Take or remove the snapshot.
+
+case "$Oop" in
+
+  snap)
+    case "$Osnapsz" in
+      *%*) szarg="extents" ;;
+      *) szarg="size" ;;
+    esac
+    lvcreate >/dev/null --snapshot \
+      --$szarg="$Osnapsz" \
+      --name="$lv.$Otag" \
+      "$vg/$lv"
+    echo "$vg/$lv.$Otag"
+    ;;
+
+  unsnap)
+    ## LVM snapshot removal is full of awful bugs, mostly to do with races
+    ## with udev.  We have a handy script which does the necessary.  May it
+    ## not be needed for long.
+    lvm-rmsnap >/dev/null "$vg/$lv.$Otag"
+    ;;
+
+  *)
+    echo >&2 "$quis: unknown operation \`$Oop'"
+    exit 1
+    ;;
+
+esac
+
+###----- That's all, folks --------------------------------------------------
diff --git a/snap.lvm.8 b/snap.lvm.8
new file mode 100644
index 0000000..52b906a
--- /dev/null
+++ b/snap.lvm.8
@@ -0,0 +1,104 @@
+.TH snap.lvm 8 "6 November 2011" "distorted.org.uk backup"
+.SH NAME
+snap.lvm \- snapshot handler for LVM logical volumes
+.SH SYNOPSIS
+.B snap.lvm
+.IB vg-name / lv-name
+.IB key = value
+\&...
+.SH DESCRIPTION
+This is a snapshot handler for LVM volumes: see
+.BR snap (8)
+for more information about how snapshot handlers fit into the overall
+system.  It creates or removes a snapshot of the logical volume named
+.I lv-name
+on the volume group
+.IR vg-name .
+.PP
+The following options keys are recognized, either with a
+.RB ` .lvm '
+suffix or without.  Other keys are reported as errors, unless the key
+contains a
+.RB ` . '
+character.
+.TP
+.BI op= op
+The
+.I op
+must be either
+.B snap
+(the default) to create a snapshot, or
+.B unsnap
+to remove it.
+.TP
+.BI tag= tag
+The snapshot volume will be named
+.IB lv-name . tag \fR.
+The default tag is
+.BR snap .
+It is recommended that scripts always provide a tag identifying their
+purpose.
+.TP
+.BI snapsz= sz
+The size to allocate for the snapshot's backing store.  This may be an
+absolute size followed by an optional unit suffix
+.RB ` K ',
+.RB ` M ',
+.RB ` T ',
+.RB ` P ',
+or
+.RB ` E '
+for kilobytes, megabytes (the default), terabytes, petabytes, or
+exabytes, respectively; or a relative size of the form
+.IB n % what \fR,
+where
+.I what
+is
+.BR VG ,
+.BR FREE ,
+or
+.BR ORIGIN ,
+to request
+.IR n %
+of the total space in the volume group, the free space remaining in the
+volume group, or the size of the origin volume, respectively.  (The
+.I what
+suffix may be abbreviated to just the first letter; the suffixes are not
+case-sensitive in either case.)
+.SH BUGS
+The LVM tools are rather buggy, and exhibit nasty races with
+.B udev (8).
+The
+.BR lvremove (8)
+tool is particularly awful.
+The
+.B snap.lvm
+handler uses
+.BR lvm-rmsnap (8)
+to work around these bugs.
+.PP
+If you make a snapshot of a logical volume while a filesystem is
+directly mounted from the logical volume on the same machine, and the
+filesystem kernel driver implements the
+.B freeze_fs
+and
+.B unfreeze_fs
+superblock operations, then the kernel will arrange for the snapshot to
+contain a consistent and clean snapshot of the filesystem \(en in
+particular, it shouldn't need the ministrations of
+.BR fsck (8).
+If the filesystem is mounted by a different machine, e.g., a guest
+running on the same host, or via a network block-device access protocol,
+you will need to negotiate with the remote machine in order to obtain a
+clean snapshot.  See
+.BR snap.rfreezefs (8)
+for a snapshot handler which copes with this, and
+.BR rfreezefs (8)
+for the actual machinery.
+.SH SEE ALSO
+.BR snap (8),
+.BR lvm (8),
+.BR lvm-rmsnap (8),
+.BR rfreezefs (8).
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
diff --git a/snap.rfreezefs.8 b/snap.rfreezefs.8
new file mode 100644
index 0000000..28afa54
--- /dev/null
+++ b/snap.rfreezefs.8
@@ -0,0 +1,76 @@
+.TH snap.rfreezefs 8 "28 November 2011" "distorted.org.uk backup"
+.SH NAME
+snap.rfreezefs \- snapshot handler for remotely mounted filesystems
+.SH SYNOPSIS
+.B snap.rfreezefs
+.I device
+.IB key = value
+\&...
+.SH DESCRIPTION
+This is snapshot handler for remotely mounted filesystems; i.e.,
+filesystems mounted by some other machine which are granted direct
+block-level access to the volume.
+See
+.BR snap (8)
+for more information about how snapshot handlers fit into the overall
+system.
+It uses
+.BR rfreezefs (8)
+to negotiate with the remote host and ensure a consistent snapshot; it
+uses another snapshot handler, identified by the
+.B snapshot
+option, to actually acquire the snapshot.
+.PP
+The following options are recognized, either with a
+.RB ` .rfreezefs '
+suffix or without.  Other keys are passed on to the subtype handler.
+.TP
+.BI host=\fR[ user @\fR] hostname
+The hostname or IP address of the remote host, optionally with user
+name.  This name is passed to
+.BR ssh (1)
+in order to run
+.BR rfreezefs (8)
+on the remote host, but it must actually resolve to an address since
+it's also used to connect to the remote host as part of the
+.B rfreezefs
+synchronization protocol.  This option must be specified.
+.BI dir= mountpt
+The mount point of the filesystem on the remote host.  This option must
+be specified.
+.TP
+.BI op= op
+The
+.I op must be either
+.B snap
+(the default) to create a snapshot, or
+.B unsnap
+to remove it.
+.TP
+.BI rfreezefs= path
+The path to the
+.BR rfreezefs (8)
+binary on the remote host.  By default, the unqualified name
+.B rfreezefs
+will be used, and the remote shell will search the directories named in
+the
+.B PATH
+environment variable.  In fact, the
+.I path
+can be any shell syntax.
+.TP
+.B ssh= path
+Te path to the
+.BR ssh (1)
+program, or equivalent.  By default, the unqualified name
+.B ssh
+is used, and the directories named in the
+.B PATH
+environment variable will be searched.
+.TP
+.B subtype= type
+The snapshot type to use to actually take the snapshot.  This option
+must be specified.
+.SH SEE ALSO
+.BR snap (8),
+.BR rfreezefs (8).
diff --git a/snap.rfreezefs.in b/snap.rfreezefs.in
new file mode 100755
index 0000000..085ae93
--- /dev/null
+++ b/snap.rfreezefs.in
@@ -0,0 +1,174 @@
+#! @PERL@
+### -*-perl-*-
+###
+### Synchronize snapshot with remotely mounted filesystem
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+use Socket;
+
+###--------------------------------------------------------------------------
+### Utilities.
+
+(our $QUIS = $0) =~ s:^.*/::;
+sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
+sub fail ($) { my ($msg) = @_; whine $msg; exit $! || ($? >> 8) || 255; }
+
+our @CLEANUP = ();
+sub cleanup (&) { my ($func) = @_; unshift @CLEAUP, $func; }
+END { local $?; for my $func (@CLEANUP) { &$func } }
+
+sub gripelist ($@) {
+  my ($gripe, @things) = @_;
+  fail "$gripe: " . join(", ", @things) if @things;
+}
+
+###--------------------------------------------------------------------------
+### Parse command line.
+
+our $USAGE = "usage: $QUIS DEVICE [KEY=VALUE ...]";
+sub version { print "$QUIS, version 1.0.0\n"; }
+sub help {
+  print <<EOF;
+$USAGE
+
+Option keys:
+  dir=MOUNTPT		Mount point of filesystem on remote host [required].
+  host=[USER@]NAME	Name or address of remote host [required].
+  op=OPERATION		`snap' to create snapshot, or `unsnap' to remove.
+  rfreezefs=PATH	Location of `rfreezefs' program on remote host.
+  ssh=PATH		Location of remote-shell program on local host.
+  subtype=TYPE		Type of snapshot to create [required].
+
+Other option keys are passed to the underlying snapshot TYPE.
+EOF
+}
+@ARGV >= 1 or do { print STDERR $USAGE, "\n"; exit 1; };
+$ARGV[0] eq "-v" || $ARGV[0] eq "--version" and do { version; exit; };
+$ARGV[0] eq "-h" || $ARGV[0] eq "--help" and do { version; help; exit; };
+
+our $DEV = shift;
+our %OPT = ( dir => undef,
+	     host => undef,
+	     op => "snap",
+	     rfreezefs => "rfreezefs",
+	     ssh => "ssh",
+	     subtype => undef );
+our @PASS = ();
+
+for my $i (@ARGV) {
+  $i =~ /^([^\s=]+)=(.*)$/ or fail "malformed option `$i'";
+  my ($k, $v) = ($1, $2);
+  if ($k =~ /^([^.]+)\.(.+)$/) {
+    if ($2 eq "rfreezefs") { $k = $1; }
+  }
+  if (exists $OPT{$k}) { $OPT{$k} = $v; }
+  else { push @PASS, $i; }
+}
+gripelist "missing arguments", grep { !defined $OPT{$_} } keys %OPT;
+
+(my $host = $OPT{host}) =~ s/^.*@//;
+my $addr = inet_aton $host or fail "failed to resolve `$OPT{host}'";
+
+###--------------------------------------------------------------------------
+### Remove a snapshot if requested.
+
+if ($OPT{op} eq "unsnap") {
+
+  ## This doesn't require negotiation with the remote end.
+  if ($OPT{unsnap}) {
+    exec "snap.$OPT{subtype}", $DEV, "op=unsnap", @PASS;
+    fail "exec snap.$OPT{subtype}: $!";
+  }
+
+} elsif ($OPT{op} ne "snap") {
+  fail "unknown operation `$OPT{op}'";
+}
+
+###--------------------------------------------------------------------------
+### Run `rfreezefs' on the remote host and collect information.
+
+(my $dir = $OPT{dir}) =~ s/\'/'\\''/g;
+open SSH, "-|", $OPT{ssh}, $OPT{host}, "$OPT{rfreezefs} -n '$dir'"
+  or fail "open(ssh): $!";
+cleanup { close SSH };
+
+our %INF = ( PORT => undef );
+our %TOK = ();
+our %RTOK = ();
+our $PORT = undef;
+
+while (<SSH>) {
+  my @f = split;
+  if ($f[0] eq "PORT") { $INF{$f[0]} = $f[1]; }
+  elsif ($f[1] eq "TOKEN") { $TOK{$f[1]} = $f[2]; $RTOK{$f[2]} = $f[1]; }
+  elsif ($f[0] eq "READY") { last; }
+}
+
+gripelist "missing information", grep { !defined $INF{$_} } keys %INF;
+gripelist "missing tokens",
+  grep { !exists $TOK{$_} } "FREEZE", "FROZEN", "THAW", "THAWED";
+
+###--------------------------------------------------------------------------
+### Create the snapshot.
+
+## Connect to the socket.
+socket SK, PF_INET, SOCK_STREAM, 0 or fail "socket: $!";
+cleanup { close SK };
+select SK; $| = 1;
+connect SK, sockaddr_in($INF{PORT}, $addr) or fail "connect: $!";
+
+## Communication with the server.
+sub rffscmd ($;$) {
+  my ($cmd, $rpl) = @_;
+  print SK $TOK{$cmd}, "\n" or fail "write <$cmd>: $!";
+  if ($rpl) {
+    chomp (my $line = <SK>);
+    if ($line ne $TOK{$rpl}) {
+      my $what = exists $RTOK{$line} ? "<$RTOK{$line}>" : "`$line'";
+      fail "unexpected response $what to <$cmd>";
+    }
+  }
+}
+
+## Freeze the remote filesystem.
+rffscmd(FREEZE, FROZEN);
+
+## Create the snapshot locally using the appropriate mechanism.  This will
+## print the snapshot device name.
+my $rc = system "snap.$OPT{subtype}", $DEV, @PASS;
+$rc and fail "snap.$OPT{subtype} failed (rc = $rc)";
+
+## Discard the snapshot again if anything goes wrong.
+cleanup {
+  if ($?) {
+    my $rc = system "snap.$OPT{subtype}", $DEV, "unsnap", @PASS;
+    $rc and
+      whine "snap.$OPT{subtype} failed to unsnap (rc = $rc) " .
+	"while recovering";
+  }
+};
+
+## Thaw the remote filesystem.
+rffscmd(THAW, THAWED);
+
+###----- That's all, folks --------------------------------------------------
+
+exit 0;
diff --git a/snap.ro b/snap.ro
new file mode 100755
index 0000000..30a7806
--- /dev/null
+++ b/snap.ro
@@ -0,0 +1,133 @@
+#! /bin/sh
+###
+### Make fake snapshots by remounting a filesystem readonly
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+set -e
+quis=${0##*/}
+
+###--------------------------------------------------------------------------
+### Parse the command line.
+
+## Provide help or version information.
+usage="usage: $quis DEVICE [KEY=VALUE ...]"
+version="$quis, version 1.0.0"
+case "$#,$1" in
+  0,*) echo >&2 "$usage"; exit 1 ;;
+  *,-v | *,--version) echo "$version"; exit ;;
+  *,-h | *,--help)
+    cat <<EOF
+$version
+$usage
+
+Option keys:
+  dir=MOUNTPT		Mount point of filesystem.
+  op=OPERATION		\`snap' to create snapshot, or \`unsnap' to remove.
+  tag=TAG		Disambiguation tag to store in filesystem.
+EOF
+    exit
+    ;;
+esac
+
+## Scan the option keys.
+dev=$1; shift
+Oop=snap Otag=snap
+win=t
+for i in "$@"; do
+  case "$i" in
+    ?*=*) ;;
+    *) echo >&2 "$quis: malformed option \`$i'"; exit 1 ;;
+  esac
+  k=${i%%=*} v=${i#*=}
+  case "$k" in *.ro) k=${k%.ro} ;; ?*.?*) continue ;; esac
+  case "$k" in
+    op | tag | dir) eval "O$k=\$v" ;;
+    *) echo >&2 "$quis: unknown option \`$k'"; win=nil ;;
+  esac
+done
+case $win in nil) exit 1 ;; esac
+
+## Find a mount point if none was given.
+sdn='s/^b[^ ]*  *[^ ]*  *[^ ]*  *[^ ]*  *\([0-9]*\),  *\([0-9]*\) .*$/\1:\2/'
+case "${Odir+t}" in
+  t) ;;
+  *)
+    exec 3</etc/mtab
+    devno=$(ls -lL "/dev/$dev" | sed "$sdn")
+    case "$devno" in
+      *:*) ;;
+      *) echo >&2 "$quis: $dev is not a block device"; exit 1 ;;
+    esac
+    while read <&3 d m fs hunoz; do
+      if [ ! -b "$d" ]; then continue; fi
+      dn=$(ls -lL "$d" | sed "$sdn")
+      case "$dn" in
+	"$devno")
+	  case "${Odir+t}" in
+	    t) echo >&2 "$quis: /dev/$dev mounted multiple times"; exit 1 ;;
+	  esac
+	  Odir=$m
+	  ;;
+      esac
+    done
+    exec 3>&-
+    ;;
+esac
+case "${Odir+t}" in
+  t) ;; *) echo >&2 "$quis: /dev/$dev apparently not mounted"; exit 1 ;;
+esac
+
+###--------------------------------------------------------------------------
+### Take or remove the snapshot.
+
+case "$Oop" in
+
+  snap)
+    echo "$Otag" >"$Odir/.snap"
+    mount -oremount,ro "/dev/$dev" "$Odir"
+    echo "$dev"
+    ;;
+
+  unsnap)
+    if [ ! -f "$Odir/.snap" ]; then
+      echo >&2 "$quis: no snapshot tag"
+      exit 1
+    fi
+    read tag <"$Odir/.snap"
+    case "$tag" in
+      "$Otag") ;;
+      *)
+	echo >&2 "$quis: tag mismatch (found \`$tag' but expected \`$Otag')"
+	exit 1
+	;;
+    esac
+    mount -oremount,rw "/dev/$dev" "$Odir"
+    rm "$Odir/.snap"
+    ;;
+
+  *)
+    echo >&2 "$quis: unknown operation \`$Oop'"
+    exit 1
+    ;;
+
+esac
+
+###----- That's all, folks --------------------------------------------------
diff --git a/snap.ro.8 b/snap.ro.8
new file mode 100644
index 0000000..cf8349c
--- /dev/null
+++ b/snap.ro.8
@@ -0,0 +1,69 @@
+.TH snap.ro 8 "12 November 2011" "distorted.org.uk backup"
+.SH NAME
+snap.ro \- fake snapshot handler which remounts filesystems read-only
+.SH SYNOPSIS
+.B snap.ro
+.I device
+.IB key = value
+\&...
+.SH DESCRIPTION
+This is a fake snapshot handler.  Rather than produce a snapshot device
+containing a consistent view of the origin's contents, it simply
+remounts the contained filesystem read-only.  Obviously, this isn't
+satisfactory for operations which might alter the block device, but it's
+fine for stuff like backups.
+.PP
+The following options keys are recognized, either with a
+.RB ` .ro '
+suffix or without.  Other keys are reported as errors, unless the key
+contains a
+.RB ` . '
+character.
+.TP
+.BI op= op
+The
+.I op
+must be either
+.B snap
+(the default) to create a `snapshot' \(en i.e., remount read-only \(en or
+.B unsnap
+to remove it \(en i.e., remount read/write.
+.TP
+.BI tag= tag
+On snapshot creation, record the
+.I tag
+in the filesystem before remounting readonly; on snapshot removal,
+verify that the tag recorded matches
+.IR tag .
+This prevents a different script from making the filesystem read/write
+prematurely.  The default tag is
+.BR snap .
+.BI dir= mount-pt
+The filesystem is mounted on
+.IR mount-pt .
+If you omit this option,
+.B snap.ro
+will search
+.B /etc/mtab
+for a mount point for the given
+.IR device .
+If exactly one such mount point is found then it will be used;
+otherwise, an error is reported.
+.SH BUGS
+This assumes that the filesystem is mounted locally. If the block device
+is exported (e.g., to a virtual guest, or using a network block device)
+then this won't work and you'll need to do something more complicated.
+.PP
+Linux allows the same device to be mounted in several different places
+using bind mounts.  Each of these occurrences needs to be made read-only
+if the device contents are to be made consistent;
+.BR snap.ro
+is too stupid to handle this properly: instead, if
+.B dir
+is omitted, it checks that the device is mentioned exactly once in
+.BR /etc/mtab .
+.SH SEE ALSO
+.BR snap (8),
+.BR mount (8).
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
diff --git a/snap.trivial b/snap.trivial
new file mode 100755
index 0000000..f33bb05
--- /dev/null
+++ b/snap.trivial
@@ -0,0 +1,82 @@
+#! /bin/sh
+###
+### Make fake snapshots by doing nothing at all
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+set -e
+quis=${0##*/}
+
+###--------------------------------------------------------------------------
+### Parse the command line.
+
+## Provide help or version information.
+usage="usage: $quis DEVICE [KEY=VALUE ...]"
+version="$quis, version 1.0.0"
+case "$#,$1" in
+  0,*) echo >&2 "$usage"; exit 1 ;;
+  *,-v | *,--version) echo "$version"; exit ;;
+  *,-h | *,--help)
+    cat <<EOF
+$version
+$usage
+
+Option keys:
+  op=OPERATION		\`snap' to create snapshot, or \`unsnap' to remove.
+  tag=TAG		Disambiguation tag to store in filesystem.
+EOF
+    exit
+    ;;
+esac
+
+## Scan the option keys.
+dev=$1; shift
+Oop=snap Otag=snap
+win=t
+for i in "$@"; do
+  case "$i" in
+    ?*=*) ;;
+    *) echo >&2 "$quis: malformed option \`$i'"; exit 1 ;;
+  esac
+  k=${i%%=*} v=${i#*=}
+  case "$k" in *.trivial) k=${k%.trivial} ;; ?*.?*) continue ;; esac
+  case "$k" in
+    op | tag) eval "O$k=\$v" ;;
+    *) echo >&2 "$quis: unknown option \`$k'"; win=nil ;;
+  esac
+done
+case $win in nil) exit 1 ;; esac
+
+###--------------------------------------------------------------------------
+### Take or remove the snapshot.
+
+case "$Oop" in
+  snap)
+    echo "$dev"
+    ;;
+  unsnap)
+    ;;
+  *)
+    echo >&2 "$quis: unknown operation \`$Oop'"
+    exit 1
+    ;;
+esac
+
+###----- That's all, folks --------------------------------------------------
diff --git a/snap.trivial.8 b/snap.trivial.8
new file mode 100644
index 0000000..2dced58
--- /dev/null
+++ b/snap.trivial.8
@@ -0,0 +1,39 @@
+.TH snap.trivial 8 "12 November 2011" "distorted.org.uk backup"
+.SH NAME
+snap.ro \- fake snapshot handler which does nothing at all
+.SH SYNOPSIS
+.B snap.ro
+.I device
+.IB key = value
+\&...
+.SH DESCRIPTION
+This is a fake snapshot handler.  Rather than produce a snapshot device
+containing a consistent view of the origin's contents, it does nothing
+at all.
+.PP
+The following options keys are recognized, either with a
+.RB ` .trivial '
+suffix or without.  Other keys are reported as errors, unless the key
+contains a
+.RB ` . '
+character.
+.TP
+.BI op= op
+The
+.I op
+must be either
+.B snap
+(the default) or
+.BR unsnap .
+In the former case, the
+.I device
+name is printed to standard output; in neither case does
+.B snap.trivial
+actually do anything substantive.
+.TP
+.BI tag= tag
+This option is accepted and ignored.
+.SH SEE ALSO
+.BR snap (8).
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
diff --git a/snaptab.5.in b/snaptab.5.in
new file mode 100644
index 0000000..8a31dd5
--- /dev/null
+++ b/snaptab.5.in
@@ -0,0 +1,305 @@
+.ie t .ds o \(bu
+.el .ds o o
+.de hP
+.IP
+\h'-\w'\fB\\$1\ \fP'u'\fB\\$1\ \fP\c
+..
+.TH snaptab 5 "6 November 2011" "distorted.org.uk backup"
+.SH NAME
+snaptab \- device-level snapshot parameters
+.SH DESCRIPTION
+The
+.B snaptab
+file describes mechanisms and parameters for taking snapshots of block
+devices.  The
+.BR snap (8)
+program by default reads
+.BR @sysconfdir@/snaptab ,
+but it can be instructed to read a different file using the
+.B \-c
+command-line option.
+.SS Syntax
+The file consists of a sequence of logical lines.  A logical line
+consists of one or more newline-terminated physical lines, all but the
+last of which ends with a backslash
+.B ` \e '
+followed by zero or more whitespace characters.  These trailing
+backslashes and trailing whitespace are stripped from the logical line
+during parsing, and are not further significant.
+.PP
+A logical line may be
+.IR empty ,
+consisting only of whitespace;
+a
+.IR comment ,
+beginning with zero or more whitespace characters followed by a
+.RB ` # ';
+or a
+.IR "data line" .
+Empty lines and comments are ignored.
+.PP
+A data line consists of two or more whitespace-separated fields.  There
+may be leading or trailing whitespace; this is not significant.
+Whitespace characters may be included in a field using escaping or
+quoting, in a manner similar to the Bourne shell, though it is not
+possible for a field to contain a newline character.
+.PP
+The precise quoting and escaping rules are as follows.  All characters
+other than whitespace (i.e., space, horizontal and vertical tabs, and
+formfeed), the backslash
+.RB ` \e ',
+the single-quote
+.RB ` ' ',
+and the double-quote
+.RB ` """" ',
+are
+.I ordinary
+and have no special meaning.  A character preceded by an unescaped
+backslash is
+.IR escaped ;
+any other character is
+.I unescaped .
+If an unquoted, unescaped double- or single-quote character occurs, the
+following characters, up until but not including the next unescaped
+occurrence of the same quote character, are
+.I double-
+or
+.I single-quoted
+respectively; other characters are
+.IR unquoted .
+A character is
+.I constituent
+if it is ordinary, escaped, single-quoted, or both double-quoted and not
+a backslash.  A field's text consists of all of the constituent
+characters bounded by the start of the line, the end of the line, or
+sequences of one or more unescaped, unquoted whitespace characters.
+.PP
+The fields of a data line are as follows.
+.IP
+.I "device	type		option \fR..."
+Briefly, the fields are interpreted as follows.  A full description is
+given below.
+.TP
+.I device
+The name of a device which can be snapshot.  This is usually the name of
+a device node in
+.B /dev
+without the
+.RB ` /dev '
+prefix, though strictly speaking its interpretation is left up to the
+snapshot handler program.  Alternatively, this can be the special name
+.RB ` * '
+to define default options for a particular snapshot type.
+.TP
+.I type
+The type of snapshot to be performed.  For each snapshot type, there is
+a program
+.BI snap. type
+which manages snapshots of that type.
+.TP
+.IB option
+The remaining fields are option assignments: these are either individual
+.IB key = value
+assignments, or tokens of the form
+.B *. type
+to interpolate the current default options for the given
+.IR type .
+.SS Interpretation
+The configuration file's data lines describe a mapping from
+.I device
+names to
+.IR type s
+and sequences of
+.IR options .
+This section describes how the mapping is derived from the file
+contents.
+.PP
+Abstractly,
+.IR device s
+and
+.IR type s
+are arbitrary strings, and
+.IR option s
+are strings of the form
+.IB key = value \fR,
+where the
+.I key
+is a nonempty string which contains no
+.RB ` = '
+characters, and the
+.I value
+is an arbitrary string.  An option of the form
+.IB key . type = value
+is said to be
+.IR qualified ;
+otherwise, an option is
+.IR unqualified .
+In a qualified option, the
+.I type
+may not contain a
+.RB ` . '
+character.
+.PP
+A data line's
+.I option
+sequence may contain
+.IB key = value
+assignments, which are not further interpreted, and various special
+tokens.  These tokens are replaced with other options or sequences of
+options during
+.IR expansion .
+The following tokens are recognized during expansion, and replaced.
+.TP
+.BI *. type
+The replacement sequence is constructed as follows.  Start with the
+default option sequence for the
+.I type
+for this line (see below).  Each qualified option in this sequence is
+retained; each unqualified option
+.IB key = value
+is qualified by
+.I type
+yielding
+.IB key . type = value \fR.
+.PP
+The
+.I "default option sequence"
+for a particular
+.I type
+.I t
+at a given line in the file is the concatenation of the expanded option
+sequences of all earlier data lines whose
+.I device
+field is
+.B *
+and whose
+.I type
+is
+.IR t .
+.PP
+The option sequence of a data line whose
+.I device
+is not
+.B *
+is
+.I augmented
+by prefixing the expanded option sequence with the default option
+sequence for the corresponding
+.I type
+ at that line.
+.PP
+It is an error if two data lines with equal
+.I device
+fields have different
+.I type
+fields, unless the
+.I device
+is
+.BR * .
+.PP
+The mapping from
+.I device
+names to
+.IR type s
+and
+.I option
+sequences can now be defined.  Collect all of the data lines whose
+.I device
+field matches the input
+.I device
+name.  The common
+.I type
+field from these lines is the resulting
+.IR type ;
+The resulting option sequence is the concatenation of the augmented
+option sequences for the collected lines.
+.PP
+Note that the
+.BR snap (8)
+program filters the option sequence, retaining only the last option with
+an given
+.IR key .
+.SH EXAMPLE
+Consider the file below.
+.IP
+.nf
+.ft B
+.ta 8 16 24 32 40 48 56 64
+## Set defaults.
+* lvm \e
+	snapsz=5%ORIGIN
+
+* rfreezefs \e
+	subtype=lvm \e
+	*.lvm
+
+## Local filesystems.
+md/boot	     		ro		dir=/boot
+vg-ibanez/root		lvm
+vg-ibanez/home		lvm
+vg-ibanez/scratch	lvm
+vg-ibanez/usr		lvm
+vg-ibanez/var		lvm
+
+## Remotely mounted filesystems.
+*			rfreezefs	host=roadstar
+vg-ibanez/db		rfreezefs	dir=/mnt/db
+vg-ibanez/ftp		rfreezefs	dir=/mnt/ftp
+vg-ibanez/news		rfreezefs	dir=/var/spool/news
+
+*			rfreezefs	host=jem
+vg-ibanez/homes		rfreezefs	dir=/home
+vg-ibanez/jb		rfreezefs	dir=/mnt/jb
+.fi
+.ft P
+.PP
+Now:
+.hP \*o
+At the
+.B md/boot
+line, the default option list for type
+.B lvm
+is
+.B snapsz=5%ORIGIN
+and the default option list for type
+.B rfreezefs
+is
+.B subtype=lvm
+.BR snapsz.lvm=5%ORIGIN .
+.hP \*o
+The device
+.B vg-ibanez/scratch
+has type
+.B lvm
+and option list
+.BR snapsz=5%ORIGIN .
+.hP \*o
+The device
+.B vg-ibanez/db
+maps to type 
+.B rfreezefs
+and option list
+.B subtype=lvm
+.B snapsz.lvm=5%ORIGIN
+.B host=roadstar
+.BR dir=/mnt/db .
+.hP \*o
+The device
+.B vg-ibanez/jb
+maps to type
+.B rfreezefs
+and option list
+.B subtype=lvm
+.B snapsz.lvm=5%ORIGIN
+.B host=roadstar
+.B host=jem
+.BR dir=/mnt/db ;
+but only the latter
+.B host
+option is significant \(en the other will be omitted from the list
+passed to
+.BR snap.rfreezefs (8).
+.SH SEE ALSO
+.BR snap (8)
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
-- 
2.11.0