Strip redundant Emacs mode markers from Perl scripts.
[distorted-backup] / lvm-rmsnap.in
CommitLineData
99248ed2 1#! @PERL@
99248ed2
MW
2###
3### Remove an LVM snapshot, without falling foul of LVM bugs
4###
5### (c) 2011 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This program is free software; you can redistribute it and/or modify
11### it under the terms of the GNU General Public License as published by
12### the Free Software Foundation; either version 2 of the License, or
13### (at your option) any later version.
14###
15### This program is distributed in the hope that it will be useful,
16### but WITHOUT ANY WARRANTY; without even the implied warranty of
17### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18### GNU General Public License for more details.
19###
20### You should have received a copy of the GNU General Public License
21### along with this program; if not, write to the Free Software Foundation,
22### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
24use Cwd qw(realpath);
25use Errno qw(:POSIX);
26use Fcntl qw(:mode);
27use File::stat;
28use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
29use IO::Handle;
30use Time::HiRes qw(time);
31
32our $VERSION = "@VERSION@";
33
34###--------------------------------------------------------------------------
35### Utilities.
36
37## Error handling and reporting.
38(our $QUIS = $0) =~ s:^.*/::;
39our $DEBUG = 0;
40sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
41sub burble ($) { my ($msg) = @_; whine $msg if $DEBUG; }
42sub fail ($) { my ($msg) = @_; whine $msg; exit $! || ($? >> 8) || 255; }
43
44## Cleanups. Call `cleanup BLOCK' to arrange to have BLOCK executed at the
45## end of the program.
46our @CLEANUP = ();
47sub runcleanups { for my $f (@CLEANUP) { &$f } }
48END { runcleanups; }
49$SIG{INT} = $SIG{TERM} = sub {
50 my $sig = shift;
51 runcleanups;
52 $SIG{$sig} = 'DEFAULT';
53 kill $sig => $$;
54};
55sub cleanup (&) { unshift @CLEANUP, $_[0]; }
56
57sub fixint ($) { my ($x) = @_; return $x =~ /^0/ ? oct $x : $x + 0; }
58
59###--------------------------------------------------------------------------
60### Device fiddling.
61
62sub devsys ($) {
63 ## devsys DEV
64 ##
65 ## Return a sysfs path for a device DEV.
66
67 my ($dev) = @_;
68 my $st = stat $dev or fail "stat ($dev): $!";
69 my $kind;
70 if (S_ISBLK($st->mode)) { $kind = "block"; }
71 elsif (S_ISCHR($st->mode)) { $kind = "char"; }
72 else { fail "$dev is not a device"; }
73 my ($maj, $min) = (($st->rdev >> 8) & 0xff, $st->rdev & 0xff);
74 (my $whole = realpath "/sys/dev/$kind/$maj:$min") =~ s:^/sys/:/:;
75 return $whole;
76}
77
78our %DMTAB = ();
79
80sub dmtable_update () {
81 ## dmtable_update
82 ##
83 ## Update the device-mapper table in %DMTAB.
84
85 burble "re-read device-mapper table";
86 %DMTAB = ();
87 open my $dt, "-|", "dmsetup", "table" or fail "open (dm table): $!";
88 while (my $line = $dt->getline) {
89 my ($dev, $rest) = split /[:\s]+/, $line, 2;
90 push @{$DMTAB{$dev}}, [split ' ', $rest];
91 }
92 close $dt or fail "dmsetup table failed (rc = $?)";
93}
94
95sub dmname ($) {
96 ## dmname SYSPATH
97 ##
98 ## Return the device-mapper node name for the sysfs path SYSPATH.
99
100 my ($sys) = @_;
101 open my $f, "<", "/sys$sys/dm/name" or fail "open ($sys/dm/name): $!";
102 chomp (my $name = $f->getline);
103 close $f;
104 return $name;
105}
106
107###--------------------------------------------------------------------------
108### I/O utilities.
109
110sub sel ($;$$$) {
111 ## sel TIMEOUT, [READS, WRITES, EXCEPTIONS]
112 ##
113 ## Wait for at most TIMEOUT seconds (indefinitely if TIMEOUT is `undef').
114 ## Each of READS, WRITES and EXCEPTIONS is a listref containing FILE => SUB
115 ## pairs: if the FILE is readable (writable, has an exceptional condition)
116 ## then the SUB is invoked.
117
118 my ($t, $r, $w, $x) = @_;
119 my ($vr, $vw, $vx);
120 my (%r, %w, %x);
121
122 ## Read the arguments and build a data structure.
123 for my $i ([$r, \$vr, \%r], [$w, \$vw, \%w], [$x, \$vx, \%x]) {
124 my ($a, $v, $h) = @$i;
125 next unless $a;
126 my @a = @$a;
127 while (@a) {
128 my ($f, $g) = splice @a, 0, 2;
129 my $fd = $f->fileno;
130 $h->{$fd} = $g;
131 vec($$v, $fd, 1) = 1;
132 }
133 }
134
135 ## Do the wait and sift through the results.
136 defined select $vr, $vw, $vx, $t or fail "select: $!";
137 for my $i ([$vr, \%r], [$vw, \%w], [$vx, \%x]) {
138 my ($v, $h) = @$i;
139 while (my ($f, $g) = each %$h) {
140 if (vec $v, $f, 1) { &$g; }
141 }
142 }
143}
144
145sub doread ($;$) {
146 ## doread FILE, [LEN]
147 ##
148 ## Read LEN bytes (or a default amount) from FILE. If the file ends,
149 ## return undef. If reading would block then return an empty string.
150 ## Otherwise return he stuff.
151
152 my ($f, $n) = @_;
153 $n = sysread $f, my $buf, $n // 4096;
154 if (!defined $n) { return "" if $! == EAGAIN; fail "read: $!"; }
155 elsif (!$n) { return undef; }
156 else { return $buf; }
157}
158
159sub run ($$@) {
160 ## run WHAT, PROG, ARGS...
161 ##
162 ## Run PROG, passing it ARGS. Fails if PROG exits nonzero.
163
164 my ($what, $prog, @args) = @_;
165 system($prog, @args) == 0 or fail "$prog ($what) failed (rc = $?)";
166}
167
168sub capture ($@) {
169 ## capture PROG, ARGS...
170 ##
171 ## Run PROG, passing it ARGS. Returns exit status, stdout, and stderr, as
172 ## strings.
173
174 my ($prog, @args) = @_;
175 my ($out, $err) = ("", "");
176 my ($outpipe_in, $outpipe_out, $errpipe_in, $errpipe_out);
177 pipe $outpipe_in, $outpipe_out or fail "pipe ($prog out): $!";
178 pipe $errpipe_in, $errpipe_out or fail "pipe ($prog err): $!";
179 defined (my $kid = fork) or fail "fork ($prog): $!";
180 if ($kid == 0) {
181 close $outpipe_in
182 and close $errpipe_in
183 and open STDOUT, ">&", $outpipe_out
184 and open STDERR, ">&", $errpipe_out
185 and exec $prog, @args
186 or fail "exec $prog: $!";
187 }
188 close $outpipe_out;
189 close $errpipe_out;
190 for (;;) {
191 my @r = ();
192 for my $i ([\$outpipe_in, \$out, "out"],
193 [\$errpipe_in, \$err, "err"]) {
194 my ($p, $b, $w) = @$i;
195 push @r, $$p => sub {
196 my $buf = doread $$p;
197 if (defined $buf) { $$b .= $buf; }
198 else { close $$p; $$p = undef; }
199 } if $$p;
200 }
201 last unless @r;
202 sel undef, \@r;
203 }
204 waitpid $kid, 0 or fail "waitpid ($prog): $!";
205 return $?, $out, $err;
206}
207
208###--------------------------------------------------------------------------
209### Monitoring udev events.
210
211sub umon_create (@) {
212 ## umon_create ARGS...
213 ##
214 ## Create a udev monitor, with the given `udevadm monitor' arguments, and
215 ## return an object. We always select only kernel events. We try to wait
216 ## for the monitor to start up before returning. Don't trust this: use
217 ## `umon_sync' anyway.
218
219 my @args = @_;
220 my $u = {};
221
222 ## Start the monitor process.
223 $u->{KID} = open($u->{PIPE}, "-|",
224 "stdbuf", "-o0",
225 "udevadm", "monitor", "--kernel", "--property", @args)
226 or fail "open (umon): $!";
227 cleanup { kill 9, $u->{KID} };
228 $u->{PIPE}->blocking(0) or fail "set non-blocking (umon): $!";
229
230 ## Wait for the end of the preamble, indicated by the first blank line.
231 ## From observation with strace(1), this means that the monitor has
232 ## successfully attached itself to its netlink socket and is ready to fetch
233 ## events.
234 my $ok = 0;
235 my $buf = "";
236 my $now = time;
237 my $end = $now + 5;
238 while (!$ok) {
239 sel
240 $end - $now,
241 [ $u->{PIPE} => sub {
242 defined (my $b = doread $u->{PIPE}) or fail "read (umon): eof";
243 $buf .= $b;
244 if ($buf =~ /\n\n(.*)$/) { $ok = 1; $buf = $1; }
245 }
246 ];
247 $now = time;
248 if ($now >= $end) { fail "umon timeout"; }
249 }
250 $u->{BUF} = $buf;
251
252 ## Done.
253 return $u;
254}
255
256sub umon_read ($) {
257 ## umon_read UMON
258 ##
259 ## Read events from UMON, as a list of hash references mapping properties
260 ## to their values.
261
262 my ($u) = @_;
263 my @s = ();
264 for (;;) {
265 defined (my $buf = doread $u->{PIPE}) or fail "read (umon): end of file";
266 $buf eq "" and last;
267 $buf = $u->{BUF} . $buf;
268 my @r = split /\n\n/, $buf, -1;
269 $u->{BUF} = pop @r;
270 for my $r (@r) {
271 push @s, { map { /^(\w+)=(.*)$/ } split /\n/, $r };
272 }
273 }
274 return @s;
275}
276
277sub umon_sync ($$) {
278 ## umon_sync UMON, DEV
279 ##
280 ## Wait for UMON to report an event about the device DEV (without its
281 ## `/dev/' prefix), triggering periodically just in case it missed one.
282 ## This is useful for synchronizing. Returns the list of events which
283 ## weren't interesting.
284
285 my ($u, $dev) = @_;
286 my $now = time;
287 my $retry = 0;
288 my $done = 0;
289 my @ev = ();
290 burble "sync with udev";
291
292 until ($done) {
293
294 ## Too late. Trigger a change event and try again.
295 if ($now >= $retry) {
296 $retry = $now + 2;
297 run "trigger $dev", "udevadm", "trigger", "--sysname-match=$dev";
298 }
299
300 ## Now read events and see what happens.
301 sel
302 $retry - $now,
303 [ $u->{PIPE} => sub {
304 my @e = umon_read $u;
305 while (@e) {
306 my $e = shift @e;
307 if ($e->{DEVNAME} eq $dev) { $done = 1; push @ev, @e; last; }
308 else { push @ev, $e; }
309 }
310 }
311 ];
312 $now = time;
313 }
314
315 return @ev;
316}
317
318###--------------------------------------------------------------------------
319### Main code.
320
321## Parse the command line.
322our $USAGE = "usage: $QUIS VGNAME/LVNAME";
323sub version { print "$QUIS, version $VERSION\n"; }
324sub help {
325 print <<EOF;
326$USAGE
327
328Options:
329 -h, --help Show this help text.
330 -v, --version Show the program version number.
331 -d, --debug Show debugging information.
332 -n, --no-act Don't take corrective actions.
333EOF
334}
335
336our $NOACT = 0;
337GetOptions('help|h|?' => sub { version; help; exit; },
338 'version|v' => sub { version; exit; },
339 'debug|d' => \$DEBUG,
340 'noact|n' => \$NOACT)
341 and @ARGV == 1
342 and @ARGV[0] =~ m:(.+)/(.+):
343 or do { print STDERR $USAGE, "\n"; exit 1; };
344our ($VG, $LV) = ($1, $2);
345
346## Check that the volume in question actually exists, and is a device-mapper
347## device, before we wheel out the big guns.
348dmtable_update;
349our $SYS = devsys "/dev/$VG/$LV";
350burble "sysfs name is $SYS";
351my $t = $DMTAB{dmname $SYS}
352 or fail "/dev/$VG/$LV isn't a device-mapper device";
353if ($DEBUG) {
354 burble "found table...";
355 burble "\t" . join " ", @$_ foreach @$t;
356}
357$t->[0][2] eq "snapshot" or fail "/dev/$VG/$LV isn't a snapshot";
358
359## Create a udev monitor. We're only interested in disk-shaped block
360## devices. (If we use some other device kind for synchronization then this
361## filter will have to be broadened.)
362my $u = umon_create "--subsystem-match=block/disk";
363
364## Prepare for the awful synchronization hack. We need to make sure, below,
365## that we've read all of the interesting events resulting from an `lvremove'
366## call. To do this, we wait for an event on a different device -- but we
367## must avoid being fooled by spurious events on this device. As an attempt
368## to minimize the probability of this going wrong, acquire a pet device
369## which nobody else is using. The best idea seems to be a loopback device.
370open my $lopipe, "-|", "losetup", "--show", "--find", "/etc/motd"
371 or fail "open (losetup attach)";
372chomp (my $lo = $lopipe->getline);
373{ local $/ = undef; <$lopipe>; }
374$lo =~ s:^/dev/::;
375$lopipe->close or fail "wait (losetup attach): $!";
376cleanup { system "losetup", "--detach", "/dev/$lo" };
377
378## Initial synchronization, to make sure stuff works.
379umon_sync $u, $lo;
380
381## Try to remove the snapshot. Capture stdout and stderr, and relay them if
382## nothing serious went wrong.
383burble "initial attempt to remove snapshot";
384my ($rc, $out, $err) = capture "lvremove", "--force", "$VG/$LV";
385if ($rc != 0x500) {
386 print STDOUT $out;
387 print STDERR $err;
388 burble "lvremove didn't explode (rc = $rc): we're done here";
389 if ($rc >> 8) { $rc >>= 8 }
390 elsif ($rc & 255) { $rc += 128 }
391 exit $rc;
392}
393burble "initial lvremove failed";
394
395## OK, stuff went wrong. First see if there was a udev cookie left over, and
396## if so try to release it. It's important to know that we've read all of
397## the relevant uevents, so synchronize again.
398my @e = umon_sync $u, $lo;
399my %c = ();
400for my $e (@e) {
401 $c{($e->{DM_COOKIE} & 0xffff) | 0xd4d0000} = 1
402 if $e->{DEVPATH} eq $SYS && exists $e->{DM_COOKIE};
403}
404burble "cookies used: " . join ", ", map { sprintf "0x%x", $_ } keys %c;
405
406## Find the used cookies which are still extant, and release them.
407open $uc, "-|", "dmsetup", "udevcookies" or fail "open (cookies): $!";
408$uc->getline;
409my @leak = ();
410while (my $l = $uc->getline) {
411 my @f = split ' ', $l;
412 push @leak, $f[0] if $c{fixint $f[0]};
413}
414close $uc or fail "udevcookies failed (rc = $?)";
415for my $c (@leak) {
416 burble "release leaked cookie $c";
417 run "release cookie", "dmsetup", "udevreleasecookie", $c unless $NOACT;
418}
419
420## If we're very unlucky, the origin volume may still be suspended. Resume
421## it now, or the next attempt will get stuck. (Resuming is idempotent, so
422## we don't need to check whether it's already running.) Finding the origin
423## is annoying: search the device-mapper table for a device with a
424## `snapshot-origin' table referencing the same backing store as the
425## snapshot.
426my $back = $DMTAB{dmname $SYS}[0][3];
427my $orig = undef;
428burble "backend device $back";
429for my $dm (keys %DMTAB) {
430 my $t = $DMTAB{$dm};
431 next unless @$t == 1 &&
432 $t->[0][2] eq "snapshot-origin" &&
433 $t->[0][3] eq $back;
434 defined $orig and fail "snapshot appears to have multiple origins";
435 $orig = $dm;
436}
437defined $orig or fail "couldn't find snapshot origin device";
438burble "found origin volume $orig; resuming...";
439run "resume origin $orig", "dmsetup", "resume", $orig unless $NOACT;
440
441## See whether removing the snapshot again helps any.
442burble "retry snapshot removal";
443run "retry", "lvremove", "--force", "$VG/$LV" unless $NOACT;
444
445## OK, we're on the way to recovery. The origin device may now be not a
446## snapshot-origin any more. Refresh the device-mapper table and inspect it.
447dmtable_update;
448if (-d "/sys/dev/block/$back") {
449 my $backdm = dmname "/dev/block/$back";
450 if ($DMTAB{$orig}[0][2] ne "snapshot-origin") {
451 burble "origin released but backend $backdm still exists: remove";
452 run "remove backend $backdm", "dmsetup", "remove", $backdm
453 unless $NOACT;
454 }
455}
456
457## All done. There, that wasn't so bad, was it?
458burble "completed successfully";
459exit 0;
460
461###----- That's all, folks --------------------------------------------------