From 5675acdab338acf325a7a1c4cf50537fcd62fda3 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Fri, 19 Oct 2012 09:46:31 +0100 Subject: [PATCH] rsync-backup.in, rsync-backup.8: Retry backups which fail fshash check. I'm getting annoyed with vampire:root failing in the morning and then succeeding after a manual retry. So get the script to retry automatically. --- rsync-backup.8 | 19 +++++++++- rsync-backup.in | 115 +++++++++++++++++++++++++++++++++----------------------- 2 files changed, 87 insertions(+), 47 deletions(-) diff --git a/rsync-backup.8 b/rsync-backup.8 index a265352..b36153a 100644 --- a/rsync-backup.8 +++ b/rsync-backup.8 @@ -194,11 +194,28 @@ Expiry considers each existing dump against the policy lines in order: the last applicable line determines the dump's fate \(en so you should probably write the lines in decreasing order of duration. .TP +.BI "retry " count +The +.B live +snapshot type (see below) doesn't prevent a filesystem from being +modified while it's being backed up. If this happens, the +.B fshash +pass will detect the difference and fail. If the filesystem in question +is relatively quiescent, then maybe retrying the backup will result in a +successful consistent copy. Following this command, a backup which +results in an +.B fshash +mismatch will be retried up to +.I count +times before being declared a failure. +.TP .BI "snap " type " " \fR[\fIargs\fR...] Use the snapshot .I type for subsequent backups. Some snapshot types require additional -arguments, which may be supplied here. +arguments, which may be supplied here. This command clears the +.B retry +counter. .SS Configuration variables The following shell variables may be overridden by the configuration file. diff --git a/rsync-backup.in b/rsync-backup.in index 705696d..ade7231 100644 --- a/rsync-backup.in +++ b/rsync-backup.in @@ -521,62 +521,84 @@ do_backup () { ## Back up FS on the current host. set -e + attempt=0 ## Report the start of this attempt. log "START BACKUP of $host:$fs" - ## Create and mount the remote snapshot. - snapmnt=$(snap_$snap $snapargs $fs $fsarg) || return $? - $verbose " create snapshot" + ## Maybe we need to retry the backup. + while :; do - ## Build the list of hardlink sources. - linkdests="" - for i in $host $like; do - d=$STOREDIR/$i/$fs/last/ - if [ -d $d ]; then linkdests="$linkdests --link-dest=$d"; fi - done + ## Create and mount the remote snapshot. + snapmnt=$(snap_$snap $snapargs $fs $fsarg) || return $? + $verbose " create snapshot" - ## Copy files from the remote snapshot. - mkdir -p new/ - $verbose -n " running rsync..." - set +e - run "RSYNC of $host:$fs (snapshot on $snapmnt)" do_rsync \ - $linkdests \ - $rsyncargs \ - $snapmnt/ new/ - rc_rsync=$? - set -e - $verbose " done" + ## Build the list of hardlink sources. + linkdests="" + for i in $host $like; do + d=$STOREDIR/$i/$fs/last/ + if [ -d $d ]; then linkdests="$linkdests --link-dest=$d"; fi + done - ## Collect a map of the snapshot for verification purposes. - set +e - $verbose -n " remote fshash..." - run "@$host: fshash $fs" remote_fshash - rc_fshash=$? - set -e - $verbose " done" + ## Copy files from the remote snapshot. + mkdir -p new/ + $verbose -n " running rsync..." + set +e + run "RSYNC of $host:$fs (snapshot on $snapmnt)" do_rsync \ + $linkdests \ + $rsyncargs \ + $snapmnt/ new/ + rc_rsync=$? + set -e + $verbose " done" - ## Remove the snapshot. - unsnap_$snap $snapargs $fs $fsarg - $verbose " remove snapshot" + ## Collect a map of the snapshot for verification purposes. + set +e + $verbose -n " remote fshash..." + run "@$host: fshash $fs" remote_fshash + rc_fshash=$? + set -e + $verbose " done" - ## If we failed to copy, then give up. - case $rc_rsync:$rc_fshash in - 0:0) ;; - 0:*) return $rc_fshash ;; - *) return $rc_rsync ;; - esac + ## Remove the snapshot. + unsnap_$snap $snapargs $fs $fsarg + $verbose " remove snapshot" - ## Get a matching map of the files received. - mkdir -m750 -p $STOREDIR/tmp - localmap=$STOREDIR/tmp/fshash.$host.$fs.$date - $verbose -n " local fshash..." - run "local fshash $host:$fs" local_fshash || return $? - $verbose " done" + ## If we failed to copy, then give up. + case $rc_rsync:$rc_fshash in + 0:0) ;; + 0:*) return $rc_fshash ;; + *) return $rc_rsync ;; + esac + + ## Get a matching map of the files received. + mkdir -m750 -p $STOREDIR/tmp + localmap=$STOREDIR/tmp/fshash.$host.$fs.$date + $verbose -n " local fshash..." + run "local fshash $host:$fs" local_fshash || return $? + $verbose " done" + + ## Compare the two maps. + set +e + run "compare fshash maps for $host:$fs" diff -u new.fshash $localmap + rc_diff=$? + set -e + case $rc_diff in + 0) + break + ;; + 1) + if [ $attempt -ge $retry ]; then return $rc; fi + $verbose " fshash mismatch; retrying" + attempt=$(( $attempt + 1 )) + ;; + *) + return $rc_diff + ;; + esac + done - ## Compare the two maps. - run "compare fshash maps for $host:$fs" \ - diff -u new.fshash $localmap || return $? + ## Glorious success. rm -f $localmap $verbose " fshash match" @@ -687,9 +709,10 @@ backup () { ### Configuration functions. host () { host=$1; like=; $verbose "host $host"; } -snaptype () { snap=$1; shift; snapargs="$*"; } +snaptype () { snap=$1; shift; snapargs="$*"; retry=0; } rsyncargs () { rsyncargs="$*"; } like () { like="$*"; } +retry () { retry="$*"; } retain () { expire_policy="${expire_policy+$expire_policy -- 2.11.0