#! @BASH@ ### ### Backup script ### ### (c) 2012 Mark Wooding ### ###----- Licensing notice --------------------------------------------------- ### ### This file is part of the `rsync-backup' program. ### ### rsync-backup is free software; you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation; either version 2 of the License, or ### (at your option) any later version. ### ### rsync-backup is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with rsync-backup; if not, write to the Free Software Foundation, ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. set -e thishost=$(hostname -s) quis=${0##*/} . @pkgdatadir@/lib.sh verbose=: dryrun=nil ###-------------------------------------------------------------------------- ### Utility functions. RSYNCOPTS="--verbose" do_rsync () { ## Run rsync(1) in an appropriate manner. Configuration should ovrride ## this or set $RSYNCOPTS if it wants to do something weirder. Arguments ## to this function are passed on to rsync. rsync \ --archive --hard-links --numeric-ids --del \ --sparse --compress \ --one-file-system \ --partial \ $RSYNCOPTS \ --filter="dir-merge .rsync-backup" \ "$@" } log () { case $dryrun in t) echo >&2 " *** $*" ;; nil) now=$(date +"%Y-%m-%d %H:%M:%S %z") echo >&9 "$now $*" ;; esac } maybe () { ## Run CMD, if this isn't a dry run. case $dryrun in t) echo >&2 " +++ $*" ;; nil) "$@" ;; esac } copy () { prefix=$1 ## Copy lines from stdin to stdout, adding PREFIX. while IFS= read -r line; do printf "%s %s\n" "$prefix" "$line" done } run () { stdinp=nil while :; do case $1 in -stdin) stdinp=t; shift ;; --) shift; break ;; *) break ;; esac done tag=$1 cmd=$2; shift 2 ## Run CMD, logging its output in a pleasing manner. case $dryrun in t) echo >&2 " *** RUN $tag" echo >&2 " +++ $cmd $*" rc=0 ;; nil) log "BEGIN $tag" rc=$( case $stdinp in nil) exec &- 4>&- 5>&- 9>&- echo $? >&5; ) | copy "|" >&4; } 2>&1 | copy "*" >&4; } 4>&1 | cat >&9; } 5>&1 ) case $rc in 0) log "END $tag" ;; *) log "FAIL $tag (rc = $rc)" ;; esac ;; esac return $rc } run_diff () { out=$1 old=$2 new=$3 ## Write a unified diff from OLD to NEW, to OUT. set +e; diff -u "$old" "$new" >"$out"; rc=$?; set -e case $rc in 1) cat "$out" ;; esac return $rc } localp () { h=$1 ## Answer whether H is a local host. case $h in "$thishost") return 0 ;; *) return 1 ;; esac } hostrun () { tag=$1 cmd=$2 ## Run CMD on the current host. If the host seems local then run the ## command through a local shell; otherwise run it through ssh(1). Either ## way it will be processed by a shell. if localp $host; then run "@$host: $tag" sh -c "$cmd" else run "@$host: $tag" ssh $userat$host "$cmd" fi } _hostrun () { h=$1 cmd=$2 ## Like hostrun, but without the complicated logging, and targetted at a ## specific host. if localp $h; then sh -c "$cmd" else ssh $h "$cmd" fi } hostpath () { path=$1 ## Output (to stdout) either PATH or HOST:PATH, choosing the former if the ## current host is local. if localp $host; then echo $path else echo $userat$host:$path fi } defhook () { hook=$1 ## Define a hook called HOOK. eval hk_$hook= } addhook () { hook=$1 cmd=$2 ## Add command CMD to the hook HOOK. eval old=\$hk_$hook; new="$old $cmd" eval hk_$hook=\$new } runhook () { hook=$1; shift 1 ## Invoke HOOK, passing it the remaining arguments. eval cmds=\$hk_$hook for cmd in $cmds; do if ! $cmd "$@"; then return $?; fi done } remove_old_logfiles () { base=$1 ## Remove old logfiles with names of the form BASE.DATE#N, so that there ## are at most $MAXLOG of them. ## Count up the logfiles. nlog=0 for i in "$base".*; do if [ ! -f "$i" ]; then continue; fi nlog=$(( nlog + 1 )) done ## If there are too many, go through and delete some early ones. if [ $dryrun = nil ] && [ $nlog -gt $MAXLOG ]; then n=$(( nlog - MAXLOG )) for i in "$base".*; do if [ ! -f "$i" ]; then continue; fi rm -f "$i" n=$(( n - 1 )) if [ $n -eq 0 ]; then break; fi done fi } ###-------------------------------------------------------------------------- ### Database operations. insert_index () { host=$1 fs=$2 date=$3 vol=$4 if [ -f "$INDEXDB" ]; then sqlite3 "$INDEXDB" <$mnt/.lock mount -oremount,ro $mnt" || return $? ## Done. hostpath $mnt } unsnap_ro () { fs=$1 mnt=$2 ## Check that the filesystem still has our lock marker. hostrun "unsnap-ro $mnt" " case \$(cat $mnt/.lock) in rsync-backup) ;; *) echo unlocked by someone else; exit 31 ;; esac mount -oremount,rw $mnt rm $mnt/.lock" || return $? } ## Snapshot using LVM. SNAPSIZE="-l10%ORIGIN" snap_lvm () { vg=$1 lv=$2 ## Make the snapshot. hostrun "snap-lvm $vg/$lv" " lvcreate --snapshot -n$lv.bkp $SNAPSIZE $vg/$lv mkdir -p $SNAPDIR/$lv mount -oro /dev/$vg/$lv.bkp $SNAPDIR/$lv" || return $? ## Done. hostpath $SNAPDIR/$lv } unsnap_lvm () { vg=$1 lv=$2 ## Remove the snapshot. Sometimes LVM doesn't notice that the snapshot is ## no longer in open immdiately, so try several times. Sometimes, more ## mysteriously, something is keeping the filesystem from being unmounted, ## so try that several times and report on things keeping the filesystem ## open. hostrun "unsnap-lvm $vg/$lv" " for i in 1 2 3 4; do echo \";;; BEGIN fuser -mv $SNAPDIR/$lv\" fuser -mv $SNAPDIR/$lv | sed 's/^/;;; /' echo \";;; END fuser -mv $SNAPDIR/$lv\" echo \";;; BEGIN lsof $SNAPDIR/$lv\" lsof $SNAPDIR/$lv | sed 's/^/;;; /' echo \";;; END lsof $SNAPDIR/$lv\" if umount $SNAPDIR/$lv; then break; fi sleep 2 done rc=1 for i in 1 2 3 4; do if lvremove -f $vg/$lv.bkp; then rc=0; break; fi sleep 2 done exit $rc" || return $? } ## Complicated snapshot using LVM, where the volume group and filesystem are ## owned by different machines, so they need to be synchronized during the ## snapshot. do_rfreezefs () { lvhost=$1 vg=$2 lv=$3 fshost=$4 fsdir=$5 ## Engage in the rfreezefs protocol with the filesystem host. This ## involves some hairy plumbing. We want to get exit statuses out of both ## halves. set +e ssh $fshost rfreezefs $fsdir | { set -e ## Read the codebook from the remote end. ready=nil while read line; do set -- $line case "$1" in PORT) port=$2 ;; TOKEN) eval tok_$2=$3 ;; READY) ready=t; break ;; *) echo >&2 "$quis: unexpected keyword $1 (rfreezefs to $rhost)" exit 1 ;; esac done case $ready in nil) echo >&2 "$quis: unexpected eof (rfreezefs to $rhost)" exit 1 ;; esac ## Connect to the filesystem host's TCP port and get it to freeze its ## filesystem. exec 3<>/dev/tcp/$fshost/$port echo $tok_FREEZE >&3 read tok <&3 case $tok in "$tok_FROZEN") ;; *) echo >&2 "$quis: unexpected token $tok (rfreezefs $fsdir on $fshost)" exit 1 ;; esac ## Get the volume host to create the snapshot. set +e _hostrun >&2 3>&- $userat$lvhost \ "lvcreate --snapshot -n$lv.bkp $SNAPSIZE $vg/$lv" snaprc=$? set -e ## The filesystem can thaw now. echo $tok_THAW >&3 read tok <&3 case $tok in "$tok_THAWED") ;; *) _hostrun >&2 3>&- $userat$lvhost "lvremove -f $vg/$lv.bkp" || : echo >&2 "$quis: unexpected token $tok (rfreezefs $fsdir on $fshost)" exit 1 ;; esac ## Done. exit $snaprc } ## Sift through the wreckage to find out what happened. rc_rfreezefs=${PIPESTATUS[0]} rc_snapshot=${PIPESTATUS[1]} set -e case $rc_rfreezefs:$rc_snapshot in 0:0) ;; 112:*) echo >&2 "$quis: EMERGENCY failed to thaw $fsdir on $fshost!" exit 112 ;; *) echo >&2 "$quis: failed to snapshot $vg/$lv ($fsdir on $fshost)" exit 1 ;; esac ## Mount the snapshot on the volume host. _hostrun >&2 $userat$lvhost " mkdir -p $SNAPDIR/$lv mount -oro /dev/$vg/$lv.bkp $SNAPDIR/$lv" } snap_rfreezefs () { rhost=$1 vg=$2 lv=$3 rfs=$4 set -e run "snap-rfreezefs $host:$vg/$lv $rhost:$rfs" \ do_rfreezefs $host $vg $lv $rhost $rfs || return $? hostpath $SNAPDIR/$lv } unsnap_rfreezefs () { ## Unshapping is the same as for plain LVM. rhost=$1 vg=$2 lv=$3 rfs=$4 unsnap_lvm $vg $lv } ###-------------------------------------------------------------------------- ### Expiry computations. expire () { ## Read dates on stdin; write to stdout `EXPIRE date' for dates which ## should be expired and `RETAIN date' for dates which should be retained. ## Get the current date and convert it into useful forms. now=$(date +%Y-%m-%d) parsedate $now now_jdn=$(julian $now) now_year=$year now_month=$month now_day=$day kept=: ## Work through each date in the input. while read date; do keep=nil ## Convert the date into a useful form. jdn=$(julian $date) parsedate $date ## Work through the policy list. if [ $jdn -le $now_jdn ]; then while read ival age; do ## Decide whether the policy entry applies to this date. apply=nil case $age in forever) apply=t ;; year) if [ $year -eq $now_year ] || ([ $year -eq $(( $now_year - 1 )) ] && [ $month -ge $now_month ]) then apply=t; fi ;; month) if ([ $month -eq $now_month ] && [ $year -eq $now_year ]) || ((([ $month -eq $(( $now_month - 1 )) ] && [ $year -eq $now_year ]) || ([ $month -eq 12 ] && [ $now_month -eq 1 ] && [ $year -eq $(( $now_year - 1 )) ])) && [ $day -ge $now_day ]) then apply=t; fi ;; week) if [ $jdn -ge $(( $now_jdn - 7 )) ]; then apply=t; fi ;; *) echo >&2 "$quis: unknown age symbol \`$age'" exit 1 ;; esac case $apply in nil) continue ;; esac ## Find the interval marker for this date. case $ival in daily) marker=$date ;; weekly) ydn=$(julian $year-01-01) wk=$(( ($jdn - $ydn)/7 + 1 )) marker=$year-w$wk ;; monthly) marker=$year-$month ;; annually | yearly) marker=$year ;; *) echo >&2 "$quis: unknown interval symbol \`$ival'" exit 1 ;; esac ## See if we've alredy retained something in this interval. case $kept in *:"$marker":*) ;; *) keep=t kept=$kept$marker: ;; esac done <new.fshash } local_fshash () { { echo "*** $host $fs $date"; echo fshash -c$STOREDIR/fshash.cache -H$HASH new/ } >$localmap } expire_backups () { { seen=: for i in *-*-*; do i=${i%%.*} case $i in *[!-0-9]*) continue ;; esac case $seen in *:"$i":*) continue ;; esac seen=$seen$i: echo $i done; } | expire | while read op date; do case $op,$dryrun in RETAIN,t) echo >&2 " --- keep $date" ;; EXPIRE,t) echo >&2 " --- delete $date" ;; RETAIN,nil) echo "keep $date" ;; EXPIRE,nil) echo "delete $date" $verbose -n " expire $date..." rm -rf $date $date.* delete_index $host $fs $date $verbose " done" ;; esac done } ## Backup hooks. defhook setup defhook precommit defhook postcommit backup_precommit_hook () { host=$1 fs=$2 date=$3 ## Compatibility: You can override this hook in the configuration file for ## special effects; but it's better to use `addhook precommit'. : } addhook precommit backup_precommit_hook backup_commit_hook () { host=$1 fs=$2 date=$3 ## Compatibility: You can override this hook in the configuration file for ## special effects; but it's better to use `addhook commit'. : } addhook commit backup_commit_hook do_backup () { date=$1 fs=$2 fsarg=$3 ## Back up FS on the current host. set -e attempt=0 fshash_diff=nil ## Run a hook beforehand. set +e; runhook setup $host $fs $date; rc=$?; set -e case $? in 0) ;; 99) log "BACKUP of $host:$fs SKIPPED by hook"; return 0 ;; *) log "BACKUP of $host:$fs FAILED (hook returns $?)"; return $? ;; esac ## Report the start of this attempt. log "START BACKUP of $host:$fs" ## Maybe we need to retry the backup. while :; do ## Rig checksum variables to mismatch unless they're set later. hrfs=REMOTE hlfs=LOCAL ## Create and mount the remote snapshot. case $dryrun in t) maybe snap_$snap $fs $fsarg snapmnt="" ;; nil) snapmnt=$(snap_$snap $snapargs $fs $fsarg) || return $? ;; esac $verbose " create snapshot" ## If we had a fshash-mismatch, then clear out the potentially stale ## entries, both locally and remotely. case $fshash_diff in nil) ;; *) $verbose " prune cache" run -stdin "local prune fshash" \ fshash -u -c$STOREDIR/fshash.cache -H$HASH new/ <$fshash_diff run -stdin "@$host: prune fshash" \ _hostrun $userat$host <$fshash_diff \ "fshash -u -c$fshashdir/$fs.bkp -H$HASH ${snapmnt#*:}" ;; esac ## Build the list of hardlink sources. linkdests="" for i in $host $like; do d=$STOREDIR/$i/$fs/last/ if [ -d $d ]; then linkdests="$linkdests --link-dest=$d"; fi done ## Copy files from the remote snapshot. maybe mkdir -p new/ case $dryrun in t) $verbose " running rsync" ;; nil) $verbose -n " running rsync..." ;; esac set +e run "RSYNC of $host:$fs (snapshot on $snapmnt)" do_rsync \ $linkdests \ $rsyncargs \ $snapmnt/ new/ rc_rsync=$? set -e case $dryrun in nil) $verbose " done" ;; esac ## Collect a map of the snapshot for verification purposes. set +e case $dryrun in t) $verbose " remote fshash" ;; nil) $verbose -n " remote fshash..." ;; esac run "@$host: fshash $fs" remote_fshash rc_fshash=$? set -e case $dryrun in nil) hrfs=$(hash_file "new.fshash") log "remote fshash $HASH checksum: $hrfs" $verbose " done" ;; t) hrfs=UNSET ;; esac ## Remove the snapshot. maybe unsnap_$snap $snapargs $fs $fsarg $verbose " remove snapshot" ## If we failed to copy, then give up. case $rc_rsync:$rc_fshash in 0:0) ;; 0:*) return $rc_fshash ;; *) return $rc_rsync ;; esac ## Get a matching map of the files received. maybe mkdir -m750 -p $STOREDIR/tmp/ localmap=$STOREDIR/tmp/fshash.$host.$fs.$date case $dryrun in t) $verbose " local fshash" ;; nil) $verbose -n " local fshash..." ;; esac run "local fshash $host:$fs" local_fshash || return $? case $dryrun in nil) hlfs=$(hash_file "$localmap") log "local fshash $HASH checksum: $hlfs" $verbose " done" ;; t) hlfs=UNSET ;; esac ## Compare the two maps. set +e fshash_diff=$STOREDIR/tmp/fshash-diff.$host.$fs.$date run "compare fshash maps for $host:$fs" \ run_diff $fshash_diff new.fshash $localmap rc_diff=$? set -e case $rc_diff in 0) break ;; 1) if [ $attempt -ge $retry ]; then return $rc; fi $verbose " fshash mismatch; retrying" attempt=$(( $attempt + 1 )) ;; *) return $rc_diff ;; esac done ## Double-check the checksums. if [ $hrfs != $hlfs ]; then cat >&2 <$log 1>&9; then echo >&2 echo >&2 "$quis: backup of $host:$fs FAILED!" bkprc=1 fi ## Clear away any old logfiles. remove_old_logfiles "$logdir/$host/$fs" } backup () { ## backup FS[:ARG] ... ## ## Back up the filesystems on the currently selected host using the ## currently selected snapshot type. ## Make sure that there's a store volume. We must do this here rather than ## in the main body of the script, since the configuration file needs a ## chance to override STOREDIR. if ! [ -r $STOREDIR/.rsync-backup-store ]; then echo >&2 "$quis: no backup volume mounted" exit 15 fi ## Read the volume name if we don't have one already. Again, this allows ## the configuration file to provide a volume name. case "${VOLUME+t}${VOLUME-nil}" in nil) VOLUME=$(cat $METADIR/volume) ;; esac ## Back up each requested file system in turn. for fs in "$@"; do ## Parse the argument. case $fs in *:*) fsarg=${fs#*:} fs=${fs%%:*} ;; *) fsarg="" ;; esac $verbose " filesystem $fs" ## Move to the store directory and set up somewhere to put this backup. cd $STOREDIR case $dryrun in nil) if [ ! -d $host ]; then mkdir -m755 $host chown root:root $host fi if [ ! -d $host/$fs ]; then mkdir -m750 $host/$fs chown root:backup $host/$fs fi ;; esac cd $host/$fs ## Find out if we've already copied this filesystem today. date=$(date +%Y-%m-%d) if [ $dryrun = nil ] && [ -d $date ]; then $verbose " already dumped" continue fi ## Do the backup of this filesystem. run_backup_cmd $fs $date do_backup $date $fs $fsarg done } ###-------------------------------------------------------------------------- ### Configuration functions. defhook start defhook end done_first_host_p=nil host () { host=$1 like= userat= case $done_first_host_p in nil) runhook start; done_first_host_p=t ;; esac case "${expire_policy+t},${default_policy+t}" in t,) default_policy=$expire_policy ;; esac unset expire_policy $verbose "host $host" } snaptype () { snap=$1; shift; snapargs="$*"; retry=1; } rsyncargs () { rsyncargs="$*"; } like () { like="$*"; } retry () { retry="$*"; } user () { userat="$*@"; } retain () { case $clear_policy in t) unset expire_policy; clear_policy=nil ;; esac expire_policy="${expire_policy+$expire_policy }$*" } ###-------------------------------------------------------------------------- ### Read the configuration and we're done. usage () { echo "usage: $quis [-nv] [-c CONF]" } version () { echo "$quis version $VERSION" } whine () { echo >&8 "$@"; } while getopts "hVvc:n" opt; do case "$opt" in h) usage; exit 0 ;; V) version; config; exit 0 ;; v) verbose=whine ;; c) conf=$OPTARG ;; n) dryrun=t ;; *) exit 1 ;; esac done shift $((OPTIND - 1)) case $# in 0) ;; *) usage >&2; exit 1 ;; esac exec 8>&1 . "$conf" runhook end $bkprc case "$bkprc" in 0) $verbose "All backups successful" ;; *) $verbose "Backups FAILED" ;; esac ###----- That's all, folks -------------------------------------------------- exit $bkprc