0a4616608db7311cdb162534d86926ab15f6b941
[distorted-backup] / lvm-rmsnap.in
1 #! @PERL@
2 ### -*-perl-*-
3 ###
4 ### Remove an LVM snapshot, without falling foul of LVM bugs
5 ###
6 ### (c) 2011 Mark Wooding
7 ###
8
9 ###----- Licensing notice ---------------------------------------------------
10 ###
11 ### This program is free software; you can redistribute it and/or modify
12 ### it under the terms of the GNU General Public License as published by
13 ### the Free Software Foundation; either version 2 of the License, or
14 ### (at your option) any later version.
15 ###
16 ### This program is distributed in the hope that it will be useful,
17 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ### GNU General Public License for more details.
20 ###
21 ### You should have received a copy of the GNU General Public License
22 ### along with this program; if not, write to the Free Software Foundation,
23 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24
25 use Cwd qw(realpath);
26 use Errno qw(:POSIX);
27 use Fcntl qw(:mode);
28 use File::stat;
29 use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
30 use IO::Handle;
31 use Time::HiRes qw(time);
32
33 our $VERSION = "@VERSION@";
34
35 ###--------------------------------------------------------------------------
36 ### Utilities.
37
38 ## Error handling and reporting.
39 (our $QUIS = $0) =~ s:^.*/::;
40 our $DEBUG = 0;
41 sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
42 sub burble ($) { my ($msg) = @_; whine $msg if $DEBUG; }
43 sub fail ($) { my ($msg) = @_; whine $msg; exit $! || ($? >> 8) || 255; }
44
45 ## Cleanups. Call `cleanup BLOCK' to arrange to have BLOCK executed at the
46 ## end of the program.
47 our @CLEANUP = ();
48 sub runcleanups { for my $f (@CLEANUP) { &$f } }
49 END { runcleanups; }
50 $SIG{INT} = $SIG{TERM} = sub {
51 my $sig = shift;
52 runcleanups;
53 $SIG{$sig} = 'DEFAULT';
54 kill $sig => $$;
55 };
56 sub cleanup (&) { unshift @CLEANUP, $_[0]; }
57
58 sub fixint ($) { my ($x) = @_; return $x =~ /^0/ ? oct $x : $x + 0; }
59
60 ###--------------------------------------------------------------------------
61 ### Device fiddling.
62
63 sub devsys ($) {
64 ## devsys DEV
65 ##
66 ## Return a sysfs path for a device DEV.
67
68 my ($dev) = @_;
69 my $st = stat $dev or fail "stat ($dev): $!";
70 my $kind;
71 if (S_ISBLK($st->mode)) { $kind = "block"; }
72 elsif (S_ISCHR($st->mode)) { $kind = "char"; }
73 else { fail "$dev is not a device"; }
74 my ($maj, $min) = (($st->rdev >> 8) & 0xff, $st->rdev & 0xff);
75 (my $whole = realpath "/sys/dev/$kind/$maj:$min") =~ s:^/sys/:/:;
76 return $whole;
77 }
78
79 our %DMTAB = ();
80
81 sub dmtable_update () {
82 ## dmtable_update
83 ##
84 ## Update the device-mapper table in %DMTAB.
85
86 burble "re-read device-mapper table";
87 %DMTAB = ();
88 open my $dt, "-|", "dmsetup", "table" or fail "open (dm table): $!";
89 while (my $line = $dt->getline) {
90 my ($dev, $rest) = split /[:\s]+/, $line, 2;
91 push @{$DMTAB{$dev}}, [split ' ', $rest];
92 }
93 close $dt or fail "dmsetup table failed (rc = $?)";
94 }
95
96 sub dmname ($) {
97 ## dmname SYSPATH
98 ##
99 ## Return the device-mapper node name for the sysfs path SYSPATH.
100
101 my ($sys) = @_;
102 open my $f, "<", "/sys$sys/dm/name" or fail "open ($sys/dm/name): $!";
103 chomp (my $name = $f->getline);
104 close $f;
105 return $name;
106 }
107
108 ###--------------------------------------------------------------------------
109 ### I/O utilities.
110
111 sub sel ($;$$$) {
112 ## sel TIMEOUT, [READS, WRITES, EXCEPTIONS]
113 ##
114 ## Wait for at most TIMEOUT seconds (indefinitely if TIMEOUT is `undef').
115 ## Each of READS, WRITES and EXCEPTIONS is a listref containing FILE => SUB
116 ## pairs: if the FILE is readable (writable, has an exceptional condition)
117 ## then the SUB is invoked.
118
119 my ($t, $r, $w, $x) = @_;
120 my ($vr, $vw, $vx);
121 my (%r, %w, %x);
122
123 ## Read the arguments and build a data structure.
124 for my $i ([$r, \$vr, \%r], [$w, \$vw, \%w], [$x, \$vx, \%x]) {
125 my ($a, $v, $h) = @$i;
126 next unless $a;
127 my @a = @$a;
128 while (@a) {
129 my ($f, $g) = splice @a, 0, 2;
130 my $fd = $f->fileno;
131 $h->{$fd} = $g;
132 vec($$v, $fd, 1) = 1;
133 }
134 }
135
136 ## Do the wait and sift through the results.
137 defined select $vr, $vw, $vx, $t or fail "select: $!";
138 for my $i ([$vr, \%r], [$vw, \%w], [$vx, \%x]) {
139 my ($v, $h) = @$i;
140 while (my ($f, $g) = each %$h) {
141 if (vec $v, $f, 1) { &$g; }
142 }
143 }
144 }
145
146 sub doread ($;$) {
147 ## doread FILE, [LEN]
148 ##
149 ## Read LEN bytes (or a default amount) from FILE. If the file ends,
150 ## return undef. If reading would block then return an empty string.
151 ## Otherwise return he stuff.
152
153 my ($f, $n) = @_;
154 $n = sysread $f, my $buf, $n // 4096;
155 if (!defined $n) { return "" if $! == EAGAIN; fail "read: $!"; }
156 elsif (!$n) { return undef; }
157 else { return $buf; }
158 }
159
160 sub run ($$@) {
161 ## run WHAT, PROG, ARGS...
162 ##
163 ## Run PROG, passing it ARGS. Fails if PROG exits nonzero.
164
165 my ($what, $prog, @args) = @_;
166 system($prog, @args) == 0 or fail "$prog ($what) failed (rc = $?)";
167 }
168
169 sub capture ($@) {
170 ## capture PROG, ARGS...
171 ##
172 ## Run PROG, passing it ARGS. Returns exit status, stdout, and stderr, as
173 ## strings.
174
175 my ($prog, @args) = @_;
176 my ($out, $err) = ("", "");
177 my ($outpipe_in, $outpipe_out, $errpipe_in, $errpipe_out);
178 pipe $outpipe_in, $outpipe_out or fail "pipe ($prog out): $!";
179 pipe $errpipe_in, $errpipe_out or fail "pipe ($prog err): $!";
180 defined (my $kid = fork) or fail "fork ($prog): $!";
181 if ($kid == 0) {
182 close $outpipe_in
183 and close $errpipe_in
184 and open STDOUT, ">&", $outpipe_out
185 and open STDERR, ">&", $errpipe_out
186 and exec $prog, @args
187 or fail "exec $prog: $!";
188 }
189 close $outpipe_out;
190 close $errpipe_out;
191 for (;;) {
192 my @r = ();
193 for my $i ([\$outpipe_in, \$out, "out"],
194 [\$errpipe_in, \$err, "err"]) {
195 my ($p, $b, $w) = @$i;
196 push @r, $$p => sub {
197 my $buf = doread $$p;
198 if (defined $buf) { $$b .= $buf; }
199 else { close $$p; $$p = undef; }
200 } if $$p;
201 }
202 last unless @r;
203 sel undef, \@r;
204 }
205 waitpid $kid, 0 or fail "waitpid ($prog): $!";
206 return $?, $out, $err;
207 }
208
209 ###--------------------------------------------------------------------------
210 ### Monitoring udev events.
211
212 sub umon_create (@) {
213 ## umon_create ARGS...
214 ##
215 ## Create a udev monitor, with the given `udevadm monitor' arguments, and
216 ## return an object. We always select only kernel events. We try to wait
217 ## for the monitor to start up before returning. Don't trust this: use
218 ## `umon_sync' anyway.
219
220 my @args = @_;
221 my $u = {};
222
223 ## Start the monitor process.
224 $u->{KID} = open($u->{PIPE}, "-|",
225 "stdbuf", "-o0",
226 "udevadm", "monitor", "--kernel", "--property", @args)
227 or fail "open (umon): $!";
228 cleanup { kill 9, $u->{KID} };
229 $u->{PIPE}->blocking(0) or fail "set non-blocking (umon): $!";
230
231 ## Wait for the end of the preamble, indicated by the first blank line.
232 ## From observation with strace(1), this means that the monitor has
233 ## successfully attached itself to its netlink socket and is ready to fetch
234 ## events.
235 my $ok = 0;
236 my $buf = "";
237 my $now = time;
238 my $end = $now + 5;
239 while (!$ok) {
240 sel
241 $end - $now,
242 [ $u->{PIPE} => sub {
243 defined (my $b = doread $u->{PIPE}) or fail "read (umon): eof";
244 $buf .= $b;
245 if ($buf =~ /\n\n(.*)$/) { $ok = 1; $buf = $1; }
246 }
247 ];
248 $now = time;
249 if ($now >= $end) { fail "umon timeout"; }
250 }
251 $u->{BUF} = $buf;
252
253 ## Done.
254 return $u;
255 }
256
257 sub umon_read ($) {
258 ## umon_read UMON
259 ##
260 ## Read events from UMON, as a list of hash references mapping properties
261 ## to their values.
262
263 my ($u) = @_;
264 my @s = ();
265 for (;;) {
266 defined (my $buf = doread $u->{PIPE}) or fail "read (umon): end of file";
267 $buf eq "" and last;
268 $buf = $u->{BUF} . $buf;
269 my @r = split /\n\n/, $buf, -1;
270 $u->{BUF} = pop @r;
271 for my $r (@r) {
272 push @s, { map { /^(\w+)=(.*)$/ } split /\n/, $r };
273 }
274 }
275 return @s;
276 }
277
278 sub umon_sync ($$) {
279 ## umon_sync UMON, DEV
280 ##
281 ## Wait for UMON to report an event about the device DEV (without its
282 ## `/dev/' prefix), triggering periodically just in case it missed one.
283 ## This is useful for synchronizing. Returns the list of events which
284 ## weren't interesting.
285
286 my ($u, $dev) = @_;
287 my $now = time;
288 my $retry = 0;
289 my $done = 0;
290 my @ev = ();
291 burble "sync with udev";
292
293 until ($done) {
294
295 ## Too late. Trigger a change event and try again.
296 if ($now >= $retry) {
297 $retry = $now + 2;
298 run "trigger $dev", "udevadm", "trigger", "--sysname-match=$dev";
299 }
300
301 ## Now read events and see what happens.
302 sel
303 $retry - $now,
304 [ $u->{PIPE} => sub {
305 my @e = umon_read $u;
306 while (@e) {
307 my $e = shift @e;
308 if ($e->{DEVNAME} eq $dev) { $done = 1; push @ev, @e; last; }
309 else { push @ev, $e; }
310 }
311 }
312 ];
313 $now = time;
314 }
315
316 return @ev;
317 }
318
319 ###--------------------------------------------------------------------------
320 ### Main code.
321
322 ## Parse the command line.
323 our $USAGE = "usage: $QUIS VGNAME/LVNAME";
324 sub version { print "$QUIS, version $VERSION\n"; }
325 sub help {
326 print <<EOF;
327 $USAGE
328
329 Options:
330 -h, --help Show this help text.
331 -v, --version Show the program version number.
332 -d, --debug Show debugging information.
333 -n, --no-act Don't take corrective actions.
334 EOF
335 }
336
337 our $NOACT = 0;
338 GetOptions('help|h|?' => sub { version; help; exit; },
339 'version|v' => sub { version; exit; },
340 'debug|d' => \$DEBUG,
341 'noact|n' => \$NOACT)
342 and @ARGV == 1
343 and @ARGV[0] =~ m:(.+)/(.+):
344 or do { print STDERR $USAGE, "\n"; exit 1; };
345 our ($VG, $LV) = ($1, $2);
346
347 ## Check that the volume in question actually exists, and is a device-mapper
348 ## device, before we wheel out the big guns.
349 dmtable_update;
350 our $SYS = devsys "/dev/$VG/$LV";
351 burble "sysfs name is $SYS";
352 my $t = $DMTAB{dmname $SYS}
353 or fail "/dev/$VG/$LV isn't a device-mapper device";
354 if ($DEBUG) {
355 burble "found table...";
356 burble "\t" . join " ", @$_ foreach @$t;
357 }
358 $t->[0][2] eq "snapshot" or fail "/dev/$VG/$LV isn't a snapshot";
359
360 ## Create a udev monitor. We're only interested in disk-shaped block
361 ## devices. (If we use some other device kind for synchronization then this
362 ## filter will have to be broadened.)
363 my $u = umon_create "--subsystem-match=block/disk";
364
365 ## Prepare for the awful synchronization hack. We need to make sure, below,
366 ## that we've read all of the interesting events resulting from an `lvremove'
367 ## call. To do this, we wait for an event on a different device -- but we
368 ## must avoid being fooled by spurious events on this device. As an attempt
369 ## to minimize the probability of this going wrong, acquire a pet device
370 ## which nobody else is using. The best idea seems to be a loopback device.
371 open my $lopipe, "-|", "losetup", "--show", "--find", "/etc/motd"
372 or fail "open (losetup attach)";
373 chomp (my $lo = $lopipe->getline);
374 { local $/ = undef; <$lopipe>; }
375 $lo =~ s:^/dev/::;
376 $lopipe->close or fail "wait (losetup attach): $!";
377 cleanup { system "losetup", "--detach", "/dev/$lo" };
378
379 ## Initial synchronization, to make sure stuff works.
380 umon_sync $u, $lo;
381
382 ## Try to remove the snapshot. Capture stdout and stderr, and relay them if
383 ## nothing serious went wrong.
384 burble "initial attempt to remove snapshot";
385 my ($rc, $out, $err) = capture "lvremove", "--force", "$VG/$LV";
386 if ($rc != 0x500) {
387 print STDOUT $out;
388 print STDERR $err;
389 burble "lvremove didn't explode (rc = $rc): we're done here";
390 if ($rc >> 8) { $rc >>= 8 }
391 elsif ($rc & 255) { $rc += 128 }
392 exit $rc;
393 }
394 burble "initial lvremove failed";
395
396 ## OK, stuff went wrong. First see if there was a udev cookie left over, and
397 ## if so try to release it. It's important to know that we've read all of
398 ## the relevant uevents, so synchronize again.
399 my @e = umon_sync $u, $lo;
400 my %c = ();
401 for my $e (@e) {
402 $c{($e->{DM_COOKIE} & 0xffff) | 0xd4d0000} = 1
403 if $e->{DEVPATH} eq $SYS && exists $e->{DM_COOKIE};
404 }
405 burble "cookies used: " . join ", ", map { sprintf "0x%x", $_ } keys %c;
406
407 ## Find the used cookies which are still extant, and release them.
408 open $uc, "-|", "dmsetup", "udevcookies" or fail "open (cookies): $!";
409 $uc->getline;
410 my @leak = ();
411 while (my $l = $uc->getline) {
412 my @f = split ' ', $l;
413 push @leak, $f[0] if $c{fixint $f[0]};
414 }
415 close $uc or fail "udevcookies failed (rc = $?)";
416 for my $c (@leak) {
417 burble "release leaked cookie $c";
418 run "release cookie", "dmsetup", "udevreleasecookie", $c unless $NOACT;
419 }
420
421 ## If we're very unlucky, the origin volume may still be suspended. Resume
422 ## it now, or the next attempt will get stuck. (Resuming is idempotent, so
423 ## we don't need to check whether it's already running.) Finding the origin
424 ## is annoying: search the device-mapper table for a device with a
425 ## `snapshot-origin' table referencing the same backing store as the
426 ## snapshot.
427 my $back = $DMTAB{dmname $SYS}[0][3];
428 my $orig = undef;
429 burble "backend device $back";
430 for my $dm (keys %DMTAB) {
431 my $t = $DMTAB{$dm};
432 next unless @$t == 1 &&
433 $t->[0][2] eq "snapshot-origin" &&
434 $t->[0][3] eq $back;
435 defined $orig and fail "snapshot appears to have multiple origins";
436 $orig = $dm;
437 }
438 defined $orig or fail "couldn't find snapshot origin device";
439 burble "found origin volume $orig; resuming...";
440 run "resume origin $orig", "dmsetup", "resume", $orig unless $NOACT;
441
442 ## See whether removing the snapshot again helps any.
443 burble "retry snapshot removal";
444 run "retry", "lvremove", "--force", "$VG/$LV" unless $NOACT;
445
446 ## OK, we're on the way to recovery. The origin device may now be not a
447 ## snapshot-origin any more. Refresh the device-mapper table and inspect it.
448 dmtable_update;
449 if (-d "/sys/dev/block/$back") {
450 my $backdm = dmname "/dev/block/$back";
451 if ($DMTAB{$orig}[0][2] ne "snapshot-origin") {
452 burble "origin released but backend $backdm still exists: remove";
453 run "remove backend $backdm", "dmsetup", "remove", $backdm
454 unless $NOACT;
455 }
456 }
457
458 ## All done. There, that wasn't so bad, was it?
459 burble "completed successfully";
460 exit 0;
461
462 ###----- That's all, folks --------------------------------------------------