5c9a54cefff2570bd87ba48b8cebb010202e2390
[rsync-backup] / rsync-backup.in
1 #! @BASH@
2 ###
3 ### Backup script
4 ###
5 ### (c) 2012 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This file is part of the `rsync-backup' program.
11 ###
12 ### rsync-backup is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
16 ###
17 ### rsync-backup is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
21 ###
22 ### You should have received a copy of the GNU General Public License
23 ### along with rsync-backup; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 set -e
27
28 thishost=$(hostname -s)
29 quis=${0##*/}
30
31 VERSION=@VERSION@
32 mntbkpdir=@mntbkpdir@
33 logdir=@logdir@
34 fshashdir=@fshashdir@
35 conf=@sysconfdir@/rsync-backup.conf
36
37 verbose=:
38
39 ###--------------------------------------------------------------------------
40 ### Utility functions.
41
42 RSYNCOPTS="--verbose"
43
44 do_rsync () {
45 ## Run rsync(1) in an appropriate manner. Configuration should ovrride
46 ## this or set $RSYNCOPTS if it wants to do something weirder. Arguments
47 ## to this function are passed on to rsync.
48
49 rsync \
50 --archive --hard-links --numeric-ids --del \
51 --sparse --compress \
52 --one-file-system \
53 --partial \
54 $RSYNCOPTS \
55 --filter="dir-merge .rsync-backup" \
56 "$@"
57 }
58
59 log () {
60 now=$(date +"%Y-%m-%d %H:%M:%S %z")
61 echo >&9 "$now $*"
62 }
63
64 run () {
65 tag=$1 cmd=$2; shift 2
66 ## Run CMD, logging its output in a pleasing manner.
67
68 log "BEGIN $tag"
69 rc=$(
70 { { { ( set +e
71 "$cmd" "$@" 3>&- 4>&- 5>&- 9>&-
72 echo $? >&5; ) |
73 while IFS= read line; do echo "| $line"; done >&4; } 2>&1 |
74 while IFS= read line; do echo "* $line"; done >&4; } 4>&1 |
75 cat >&9; } 5>&1 </dev/null
76 )
77 case $rc in
78 0) log "END $tag" ;;
79 *) log "FAIL $tag (rc = $rc)" ;;
80 esac
81 return $rc
82 }
83
84 localp () {
85 h=$1
86 ## Answer whether H is a local host.
87
88 case $h in
89 "$thishost") return 0 ;;
90 *) return 1 ;;
91 esac
92 }
93
94 hostrun () {
95 tag=$1 cmd=$2
96 ## Run CMD on the current host. If the host seems local then run the
97 ## command through a local shell; otherwise run it through ssh(1). Either
98 ## way it will be processed by a shell.
99
100 if localp $host; then run "@$host: $tag" sh -c "$cmd"
101 else run "@$host: $tag" ssh $host "$cmd"
102 fi
103 }
104
105 _hostrun () {
106 h=$1 cmd=$2
107 ## Like hostrun, but without the complicated logging, but targetted at a
108 ## specific host.
109
110 if localp $h; then sh -c "$cmd"
111 else ssh $h "$cmd"
112 fi
113 }
114
115 hostpath () {
116 path=$1
117 ## Output (to stdout) either PATH or HOST:PATH, choosing the former if the
118 ## current host is local.
119
120 if localp $host; then echo $path
121 else echo $host:$path
122 fi
123 }
124
125 ###--------------------------------------------------------------------------
126 ### Snapshot handling.
127
128 ## Snapshot protocol. Each snapshot type has a pair of functions snap_TYPE
129 ## and unsnap_TYPE. Each is given the current snapshot arguments and the
130 ## filesystem name to back up. The snap_TYPE function should create and
131 ## mount the snapshot and output an rsync(1) path to where the filesystem can
132 ## be copied; the unsnap_TYPE function should unmount and tear down the
133 ## snapshot.
134
135 ## Fake snapshot by not doing anything. Use only if you have no choice.
136 snap_live () { hostpath "$2"; }
137 unsnap_live () { :; }
138
139 ## Fake snapshot by remounting a live filesystem read-only. Useful if the
140 ## underlying storage isn't in LVM.
141
142 snap_ro () {
143 fs=$1 mnt=$2
144
145 ## Place a marker in the filesystem so we know why it was made readonly.
146 ## (Also this serves to ensure that the filesystem was writable before.)
147 hostrun "snap-ro $mnt" "
148 echo rsync-backup >$mnt/.lock
149 mount -oremount,ro $mnt" || return $?
150
151 ## Done.
152 hostpath $mnt
153 }
154
155 unsnap_ro () {
156 fs=$1 mnt=$2
157
158 ## Check that the filesystem still has our lock marker.
159 hostrun "unsnap-ro $mnt" "
160 case \$(cat $mnt/.lock) in
161 rsync-backup) ;;
162 *) echo unlocked by someone else; exit 31 ;;
163 esac
164 mount -oremount,rw $mnt
165 rm $mnt/.lock" || return $?
166 }
167
168 ## Snapshot using LVM.
169
170 SNAPSIZE="-l10%ORIGIN"
171 SNAPDIR=@mntbkpdir@/snap
172
173 snap_lvm () {
174 vg=$1 lv=$2
175
176 ## Make the snapshot.
177 hostrun "snap-lvm $vg/$lv" "
178 lvcreate --snapshot -n$lv.bkp $SNAPSIZE $vg/$lv
179 mkdir -p $SNAPDIR/$lv
180 mount -oro /dev/$vg/$lv.bkp $SNAPDIR/$lv" || return $?
181
182 ## Done.
183 hostpath $SNAPDIR/$lv
184 }
185
186 unsnap_lvm () {
187 vg=$1 lv=$2
188
189 ## Remove the snapshot. Sometimes LVM doesn't notice that the snapshot is
190 ## no longer in open immdiately, so try several times.
191 hostrun "unsnap-lvm $vg/$lv" "
192 umount $SNAPDIR/$lv
193 rc=1
194 for i in 1 2 3 4; do
195 if lvremove -f $vg/$lv.bkp; then rc=0; break; fi
196 sleep 2
197 done
198 exit $rc" || return $?
199 }
200
201 ## Complicated snapshot using LVM, where the volume group and filesystem are
202 ## owned by different machines, so they need to be synchronized during the
203 ## snapshot.
204
205 do_rfreezefs () {
206 lvhost=$1 vg=$2 lv=$3 fshost=$4 fsdir=$5
207
208 ## Engage in the rfreezefs protocol with the filesystem host. This
209 ## involves some hairy plumbing. We want to get exit statuses out of both
210 ## halves.
211 set +e
212 ssh $fshost rfreezefs $fsdir | {
213 set -e
214
215 ## Read the codebook from the remote end.
216 ready=nil
217 while read line; do
218 set -- $line
219 case "$1" in
220 PORT) port=$2 ;;
221 TOKEN) eval tok_$2=$3 ;;
222 READY) ready=t; break ;;
223 *)
224 echo >&2 "$quis: unexpected keyword $1 (rfreezefs to $rhost)"
225 exit 1
226 ;;
227 esac
228 done
229 case $ready in
230 nil)
231 echo >&2 "$quis: unexpected eof (rfreezefs to $rhost)"
232 exit 1
233 ;;
234 esac
235
236 ## Connect to the filesystem host's TCP port and get it to freeze its
237 ## filesystem.
238 exec 3<>/dev/tcp/$fshost/$port
239 echo $tok_FREEZE >&3
240 read tok <&3
241 case $tok in
242 "$tok_FROZEN") ;;
243 *)
244 echo >&2 "$quis: unexpected token $tok (rfreezefs $fsdir on $fshost)"
245 exit 1
246 ;;
247 esac
248
249 ## Get the volume host to create the snapshot.
250 set +e
251 _hostrun >&2 3>&- $lvhost \
252 "lvcreate --snapshot -n$lv.bkp $SNAPSIZE $vg/$lv"
253 snaprc=$?
254 set -e
255
256 ## The filesystem can thaw now.
257 echo $tok_THAW >&3
258 read tok <&3
259 case $tok in
260 "$tok_THAWED") ;;
261 *)
262 _hostrun >&2 3>&- $lvhost "lvremove -f $vg/$lv.bkp" || :
263 echo >&2 "$quis: unexpected token $tok (rfreezefs $fsdir on $fshost)"
264 exit 1
265 ;;
266 esac
267
268 ## Done.
269 exit $snaprc
270 }
271
272 ## Sift through the wreckage to find out what happened.
273 rc_rfreezefs=${PIPESTATUS[0]} rc_snapshot=${PIPESTATUS[1]}
274 set -e
275 case $rc_rfreezefs:$rc_snapshot in
276 0:0)
277 ;;
278 112:*)
279 echo >&2 "$quis: EMERGENCY failed to thaw $fsdir on $fshost!"
280 exit 112
281 ;;
282 *)
283 echo >&2 "$quis: failed to snapshot $vg/$lv ($fsdir on $fshost)"
284 exit 1
285 ;;
286 esac
287
288 ## Mount the snapshot on the volume host.
289 _hostrun >&2 $lvhost "
290 mkdir -p $SNAPDIR/$lv
291 mount -oro /dev/$vg/$lv.bkp $SNAPDIR/$lv"
292 }
293
294 snap_rfreezefs () {
295 rhost=$1 vg=$2 lv=$3 rfs=$4
296
297 set -e
298 run "snap-rfreezefs $host:$vg/$lv $rhost:$rfs" \
299 do_rfreezefs $host $vg $lv $rhost $rfs || return $?
300 hostpath $SNAPDIR/$lv
301 }
302
303 unsnap_rfreezefs () {
304
305 ## Unshapping is the same as for plain LVM.
306 rhost=$1 vg=$2 lv=$3 rfs=$4
307 unsnap_lvm $vg $lv
308 }
309
310 ###--------------------------------------------------------------------------
311 ### Expiry computations.
312
313 parsedate () {
314 date=$1
315 ## Parse an ISO8601 DATE, and set YEAR, MONTH, DAY appropriately (and
316 ## without leading zeros).
317
318 ## Extract the components of the date and trim leading zeros (which will
319 ## cause things to be interpreted as octal and fail).
320 year=${date%%-*} rest=${date#*-}; month=${rest%%-*} day=${rest#*-}
321 year=${year#0} month=${month#0} day=${day#0}
322 }
323
324 julian () {
325 date=$1
326 ## Convert an ISO8601 DATE to a Julian Day Number.
327
328 parsedate $date
329
330 ## The actual calculation: convert a (proleptic) Gregorian calendar date
331 ## into a Julian day number. This is taken from Wikipedia's page
332 ## http://en.wikipedia.org/wiki/Julian_day#Calculation but the commentary
333 ## is mine. The epoch is 4713BC-01-01 (proleptic) Julian, or 4714BC-11-24
334 ## proleptic Gregorian.
335
336 ## If the MONTH is January or February then set a = 1, otherwise set a = 0.
337 a=$(( (14 - $month)/12 ))
338
339 ## Compute a year offset relative to 4799BC-03-01. This puts the leap day
340 ## as the very last day in a year, which is very convenient. The offset
341 ## here is sufficient to make all y values positive (within the range of
342 ## the JDN calendar), and is a multiple of 400, which is the Gregorian
343 ## cycle length.
344 y=$(( $year + 4800 - $a ))
345
346 ## Compute the offset month number in that year. These months count from
347 ## zero, not one.
348 m=$(( $month + 12*$a - 3 ))
349
350 ## Now for the main event. The (153 m + 2)/5 term is a surprising but
351 ## correct trick for obtaining the number of days in the first m months of
352 ## the (shifted) year). The magic offset 32045 is what you get when you
353 ## plug the proper JDN epoch (year = -4713, month = 11, day = 24) into the
354 ## above machinery.
355 jdn=$(( $day + (153*$m + 2)/5 + 365*$y + $y/4 - $y/100 + $y/400 - 32045 ))
356
357 echo $jdn
358 }
359
360 expire () {
361 ## Read dates on stdin; write to stdout `EXPIRE date' for dates which
362 ## should be expired and `RETAIN date' for dates which should be retained.
363
364 ## Get the current date and convert it into useful forms.
365 now=$(date +%Y-%m-%d)
366 parsedate $now
367 now_jdn=$(julian $now) now_year=$year now_month=$month now_day=$day
368 kept=:
369
370 ## Work through each date in the input.
371 while read date; do
372 keep=nil
373
374 ## Convert the date into a useful form.
375 jdn=$(julian $date)
376 parsedate $date
377
378 ## Work through the policy list.
379 if [ $jdn -le $now_jdn ]; then
380 while read ival age; do
381
382 ## Decide whether the policy entry applies to this date.
383 apply=nil
384 case $age in
385 forever)
386 apply=t
387 ;;
388 year)
389 if [ $year -eq $now_year ] ||
390 ([ $year -eq $(( $now_year - 1 )) ] &&
391 [ $month -ge $now_month ])
392 then apply=t; fi
393 ;;
394 month)
395 if ([ $month -eq $now_month ] && [ $year -eq $now_year ]) ||
396 ((([ $month -eq $(( $now_month - 1 )) ] &&
397 [ $year -eq $now_year ]) ||
398 ([ $month -eq 12 ] && [ $now_month -eq 1 ] &&
399 [ $year -eq $(( $now_year - 1 )) ])) &&
400 [ $day -ge $now_day ])
401 then apply=t; fi
402 ;;
403 week)
404 if [ $jdn -ge $(( $now_jdn - 7 )) ]; then apply=t; fi
405 ;;
406 *)
407 echo >&2 "$quis: unknown age symbol \`$age'"
408 exit 1
409 ;;
410 esac
411 case $apply in nil) continue ;; esac
412
413 ## Find the interval marker for this date.
414 case $ival in
415 daily)
416 marker=$date
417 ;;
418 weekly)
419 ydn=$(julian $year-01-01)
420 wk=$(( ($jdn - $ydn)/7 + 1 ))
421 marker=$year-w$wk
422 ;;
423 monthly)
424 marker=$year-$month
425 ;;
426 annually | yearly)
427 marker=$year
428 ;;
429 *)
430 echo >&2 "$quis: unknown interval symbol \`$ival'"
431 exit 1
432 ;;
433 esac
434
435 ## See if we've alredy retained something in this interval.
436 case $kept in
437 *:"$marker":*) ;;
438 *) keep=t kept=$kept$marker: ;;
439 esac
440
441 done <<EOF
442 $expire_policy
443 EOF
444 fi
445
446 case $keep in
447 t) echo RETAIN $date ;;
448 *) echo EXPIRE $date ;;
449 esac
450
451 done
452 }
453
454 ###--------------------------------------------------------------------------
455 ### Actually taking backups of filesystems.
456
457 STOREDIR=@mntbkpdir@/store
458 MAXLOG=14
459 HASH=sha256
460
461 bkprc=0
462
463 remote_fshash () {
464 _hostrun $host "
465 umask 077
466 mkdir -p $fshashdir
467 cd ${snapmnt#*:}
468 echo \"*** $host $fs $date\"; echo
469 rsync -rx --filter='dir-merge .rsync-backup' ./ |
470 fshash -c$fshashdir/$fs.bkp -a -H$HASH -frsync
471 " >new.fshash
472 }
473
474 local_fshash () {
475 { echo "*** $host $fs $date"; echo
476 fshash -c$STOREDIR/fshash.cache -H$HASH new/
477 } >$localmap
478 }
479
480 expire_backups () {
481 { seen=:
482 for i in *-*-*; do
483 i=${i%%.*}
484 case $i in *[!-0-9]*) continue ;; esac
485 case $seen in *:"$i":*) continue ;; esac
486 seen=$seen$i:
487 echo $i
488 done; } |
489 expire |
490 while read op date; do
491 case $op in
492 RETAIN)
493 echo "keep $date"
494 ;;
495 EXPIRE)
496 echo "delete $date"
497 $verbose -n " expire $date..."
498 rm -rf $date $date.*
499 $verbose " done"
500 ;;
501 esac
502 done
503 }
504
505 backup_precommit_hook () {
506 host=$1 fs=$2 date=$3
507 ## Override this hook in the configuration file for special effects.
508
509 :
510 }
511
512 backup_commit_hook () {
513 host=$1 fs=$2 date=$3
514 ## Override this hook in the configuration file for special effects.
515
516 :
517 }
518
519 do_backup () {
520 date=$1 fs=$2 fsarg=$3
521 ## Back up FS on the current host.
522
523 set -e
524
525 ## Report the start of this attempt.
526 log "START BACKUP of $host:$fs"
527
528 ## Create and mount the remote snapshot.
529 snapmnt=$(snap_$snap $snapargs $fs $fsarg) || return $?
530 $verbose " create snapshot"
531
532 ## Build the list of hardlink sources.
533 linkdests=""
534 for i in $host $like; do
535 d=$STOREDIR/$i/$fs/last/
536 if [ -d $d ]; then linkdests="$linkdests --link-dest=$d"; fi
537 done
538
539 ## Copy files from the remote snapshot.
540 mkdir -p new/
541 $verbose -n " running rsync..."
542 set +e
543 run "RSYNC of $host:$fs (snapshot on $snapmnt)" do_rsync \
544 $linkdests \
545 $rsyncargs \
546 $snapmnt/ new/
547 rc_rsync=$?
548 set -e
549 $verbose " done"
550
551 ## Collect a map of the snapshot for verification purposes.
552 set +e
553 $verbose -n " remote fshash..."
554 run "@$host: fshash $fs" remote_fshash
555 rc_fshash=$?
556 set -e
557 $verbose " done"
558
559 ## Remove the snapshot.
560 unsnap_$snap $snapargs $fs $fsarg
561 $verbose " remove snapshot"
562
563 ## If we failed to copy, then give up.
564 case $rc_rsync:$rc_fshash in
565 0:0) ;;
566 0:*) return $rc_fshash ;;
567 *) return $rc_rsync ;;
568 esac
569
570 ## Get a matching map of the files received.
571 mkdir -m750 -p $STOREDIR/tmp
572 localmap=$STOREDIR/tmp/fshash.$host.$fs.$date
573 $verbose -n " local fshash..."
574 run "local fshash $host:$fs" local_fshash || return $?
575 $verbose " done"
576
577 ## Compare the two maps.
578 run "compare fshash maps for $host:$fs" \
579 diff -u new.fshash $localmap || return $?
580 rm -f $localmap
581 $verbose " fshash match"
582
583 ## Commit this backup.
584 backup_precommit_hook $host $fs $date
585 mv new $date
586 mv new.fshash $date.fshash
587 backup_commit_hook $host $fs $date
588 mkdir hack
589 ln -s $date hack/last
590 mv hack/last .
591 rmdir hack
592 $verbose " commit"
593
594 ## Expire old backups.
595 case "${expire_policy+t}" in
596 t) run "expiry for $host:$fs" expire_backups ;;
597 esac
598
599 ## Report success.
600 log "SUCCESSFUL BACKUP of $host:$fs"
601 }
602
603 backup () {
604 ## backup FS[:ARG] ...
605 ##
606 ## Back up the filesystems on the currently selected host using the
607 ## currently selected snapshot type.
608
609 for fs in "$@"; do
610
611 ## Parse the argument.
612 case $fs in
613 *:*) fsarg=${fs#*:} fs=${fs%%:*} ;;
614 *) fsarg="" ;;
615 esac
616 $verbose " filesystem $fs"
617
618 ## Move to the store directory and set up somewhere to put this backup.
619 cd $STOREDIR
620 if [ ! -d $host ]; then
621 mkdir -m755 $host
622 chown root:root $host
623 fi
624 if [ ! -d $host/$fs ]; then
625 mkdir -m750 $host/$fs
626 chown root:backup $host/$fs
627 fi
628 cd $host/$fs
629
630 ## Find out if we've already copied this filesystem today.
631 date=$(date +%Y-%m-%d)
632 if [ -d $date ]; then
633 $verbose " already dumped"
634 continue
635 fi
636
637 ## Find a name for the log file. In unusual circumstances, we may have
638 ## deleted old logs from today, so just checking for an unused sequence
639 ## number is insufficient. Instead, check all of the logfiles for today,
640 ## and use a sequence number that's larger than any of them.
641 seq=1
642 for i in "$logdir/$host/$fs.$date#"*; do
643 tail=${i##*#}
644 case "$tail" in [!1-9]* | *[!0-9]*) continue ;; esac
645 if [ -f "$i" -a $tail -ge $seq ]; then seq=$(( tail + 1 )); fi
646 done
647 log="$logdir/$host/$fs.$date#$seq"
648
649 ## Do the backup of this filesystem.
650 mkdir -p $logdir/$host
651 if ! do_backup $date $fs $fsarg 9>$log 1>&9; then
652 echo >&2
653 echo >&2 "$quis: backup of $host:$fs FAILED!"
654 bkprc=1
655 fi
656
657 ## Count up the logfiles.
658 nlog=0
659 for i in "$logdir/$host/$fs".*; do
660 if [ ! -f "$i" ]; then continue; fi
661 nlog=$(( nlog + 1 ))
662 done
663
664 ## If there are too many, go through and delete some early ones.
665 if [ $nlog -gt $MAXLOG ]; then
666 n=$(( nlog - MAXLOG ))
667 for i in "$logdir/$host/$fs".*; do
668 if [ ! -f "$i" ]; then continue; fi
669 rm -f "$i"
670 n=$(( n - 1 ))
671 if [ $n -eq 0 ]; then break; fi
672 done
673 fi
674 done
675 }
676
677 ###--------------------------------------------------------------------------
678 ### Configuration functions.
679
680 host () { host=$1; like=; $verbose "host $host"; }
681 snaptype () { snap=$1; shift; snapargs="$*"; }
682 rsyncargs () { rsyncargs="$*"; }
683 like () { like="$*"; }
684
685 retain () {
686 expire_policy="${expire_policy+$expire_policy
687 }$*"
688 }
689
690 ###--------------------------------------------------------------------------
691 ### Read the configuration and we're done.
692
693 usage () {
694 echo "usage: $quis [-v] [-c CONF]"
695 }
696
697 version () {
698 echo "$quis version $VERSION"
699 }
700
701 config () {
702 echo
703 cat <<EOF
704 conf = $conf
705 mntbkpdir = $mntbkpdir
706 fshashdir = $fshashdir
707 logdir = $logdir
708 EOF
709 }
710
711 whine () { echo >&8 "$@"; }
712
713 while getopts "hVvc:" opt; do
714 case "$opt" in
715 h) usage; exit 0 ;;
716 V) version; config; exit 0 ;;
717 v) verbose=whine ;;
718 c) conf=$OPTARG ;;
719 *) exit 1 ;;
720 esac
721 done
722 shift $((OPTIND - 1))
723 case $# in 0) ;; *) usage >&2; exit 1 ;; esac
724 exec 8>&1
725
726 . "$conf"
727
728 ###----- That's all, folks --------------------------------------------------
729
730 exit $bkprc