From 2e267ae8659832e97f23b78eeab6eaf793f60e19 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Mon, 10 Mar 2014 10:45:26 +0000 Subject: [PATCH] New ftpsync upstream. --- bin/ftpsync | 202 +++++++++++++++++++++++++++++++++++++++++++++++++-------- bin/runmirrors | 2 +- etc/common | 30 ++++++--- 3 files changed, 199 insertions(+), 35 deletions(-) diff --git a/bin/ftpsync b/bin/ftpsync index 1ae7b81..f85fd28 100755 --- a/bin/ftpsync +++ b/bin/ftpsync @@ -12,7 +12,7 @@ set -E # Based losely on a number of existing scripts, written by an # unknown number of different people over the years. # -# Copyright (C) 2008,2009,2010,2011 Joerg Jaspert +# Copyright (C) 2008-2012 Joerg Jaspert # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -36,7 +36,7 @@ BASEDIR=${BASEDIR:-"${HOME}"} # Script version. DO NOT CHANGE, *unless* you change the master copy maintained # by Joerg Jaspert and the Debian mirroradm group. # This is used to track which mirror is using which script version. -VERSION="80387" +VERSION="20130605" # Source our common functions . "${BASEDIR}/etc/common" @@ -268,10 +268,24 @@ LOGROTATE=${LOGROTATE:-14} LOCK=${LOCK:-"${TO}/Archive-Update-in-Progress-${MIRRORNAME}"} # timeout for the lockfile, in case we have bash older than v4 (and no /proc) LOCKTIMEOUT=${LOCKTIMEOUT:-3600} +# sleeping time when an AUIP file is found but is not ours +UIPSLEEP=${UIPSLEEP:-1200} +# retries whenever an upstream (or possibly stale) AUIP file is found +UIPRETRIES=${UIPRETRIES:-3} # Do we need another rsync run? UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" # Trace file for mirror stats and checks (make sure we get full hostname) TRACE=${TRACE:-"project/trace/${MIRRORNAME}"} +# The trace file can have different format/contents. Here you can select +# what it will be. +# Possible values are +# "full" - all information +# "terse" - basic, timestamp only (date -u) +# "touch" - just touch the file in existance +# "none" - no tracefile at all +# +# Default and required value for Debian mirrors is full. +EXTENDEDTRACE=${EXTENDEDTRACE:-"full"} # rsync program RSYNC=${RSYNC:-rsync} @@ -280,18 +294,40 @@ RSYNC=${RSYNC:-rsync} RSYNC_FILTER=${RSYNC_FILTER:-"--filter=protect_Archive-Update-in-Progress-${MIRRORNAME} --filter=protect_${TRACE} --filter=protect_Archive-Update-Required-${MIRRORNAME}"} # limit I/O bandwidth. Value is KBytes per second, unset or 0 is unlimited RSYNC_BW=${RSYNC_BW:-0} +RSYNC_PROTOCOL=$(rsync_protocol) + +# Set the delete method to --delete-delay if protocol version is 30 or +# greater (meaning rsync 3.0.0 or greater is used). Use --delete-after +# otherwise. +if [ 30 -le $RSYNC_PROTOCOL ]; then + RSYNC_DELETE_METHOD=delay +else + RSYNC_DELETE_METHOD=after +fi + # Default rsync options for *every* rsync call RSYNC_OPTIONS=${RSYNC_OPTIONS:-"-prltvHSB8192 --timeout 3600 --stats ${RSYNC_FILTER}"} # Options we only use in the first pass, where we do not want packages/sources to fly in yet and don't want to delete files -RSYNC_OPTIONS1=${RSYNC_OPTIONS1:-"--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*"} +RSYNC_OPTIONS1=${RSYNC_OPTIONS1:-"--exclude=Packages* --exclude=Sources* --exclude=Release* --exclude=InRelease --exclude=i18n/* --exclude=ls-lR*"} # Options for the second pass, where we do want everything, including deletion of old and now unused files -RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} +RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-excluded"} # Which rsync share to use on our upstream mirror? RSYNC_PATH=${RSYNC_PATH:-"debian"} +# Extra rsync options as defined by the admin locally. Won't be set +# to any default by ftpsync. Those will be added to EACH AND EVERY rsync call. +RSYNC_EXTRA=${RSYNC_EXTRA:-""} + # Now add the bwlimit option. As default is 0 we always add it, rsync interprets # 0 as unlimited, so this is safe. -RSYNC_OPTIONS="--bwlimit=${RSYNC_BW} ${RSYNC_OPTIONS}" +RSYNC_OPTIONS="${RSYNC_EXTRA} --bwlimit=${RSYNC_BW} ${RSYNC_OPTIONS}" + +# Finally, make sure RSYNC_OPTIONS2 has either --delete-after or --deleter-delay +RSYNC_OPTION_REGEX="--delete-(after|delay)" +if ! [[ ${RSYNC_OPTIONS2} =~ ${RSYNC_OPTION_REGEX} ]]; then + RSYNC_OPTIONS2+=" --delete-${RSYNC_DELETE_METHOD}" +fi +unset RSYNC_OPTION_REGEX # We have no default host to sync from, but will error out if its unset RSYNC_HOST=${RSYNC_HOST:-""} @@ -316,7 +352,7 @@ SYNCSTAGE2=${SYNCSTAGE2:-"false"} SYNCALL=${SYNCALL:-"true"} # Do we have a mhop sync? SYNCMHOP=${SYNCMHOP:-"false"} -# Do we callback? +# Do we callback? (May get changed later) SYNCCALLBACK=${SYNCCALLBACK:-"false"} # If we call back we need some more options defined in the config file. CALLBACKUSER=${CALLBACKUSER:-"archvsync"} @@ -327,17 +363,17 @@ CALLBACKKEY=${CALLBACKKEY:-"none"} EXCLUDE=${EXCLUDE:-""} # The temp directory used by rsync --delay-updates is not -# world-readable remotely. Always exclude it to avoid errors. -EXCLUDE="${EXCLUDE} --exclude .~tmp~/" +# world-readable remotely. Always exclude it to avoid errors. +EXCLUDE="${EXCLUDE} --exclude=.~tmp~/" SOURCE_EXCLUDE=${SOURCE_EXCLUDE:-""} ARCH_EXCLUDE=${ARCH_EXCLUDE:-""} # Exclude architectures defined in $ARCH_EXCLUDE for ARCH in ${ARCH_EXCLUDE}; do - EXCLUDE="${EXCLUDE} --exclude binary-${ARCH}/ --exclude installer-${ARCH}/ --exclude Contents-${ARCH}.gz --exclude Contents-${ARCH}.bz2 --exclude Contents-${ARCH}.diff/ --exclude arch-${ARCH}.files --exclude arch-${ARCH}.list.gz --exclude *_${ARCH}.deb --exclude *_${ARCH}.udeb --exclude *_${ARCH}.changes" + EXCLUDE="${EXCLUDE} --exclude=binary-${ARCH}/ --exclude=installer-${ARCH}/ --exclude=Contents-${ARCH}.gz --exclude=Contents-udeb-${ARCH}.gz --exclude=Contents-${ARCH}.diff/ --exclude=arch-${ARCH}.files --exclude=arch-${ARCH}.list.gz --exclude=*_${ARCH}.deb --exclude=*_${ARCH}.udeb --exclude=*_${ARCH}.changes" if [ "${ARCH}" = "source" ]; then if [ -z ${SOURCE_EXCLUDE} ]; then - SOURCE_EXCLUDE=" --exclude source/ --exclude *.tar.gz --exclude *.diff.gz --exclude *.tar.bz2 --exclude *.tar.xz -exclude *.diff.bz2 --exclude *.dsc " + SOURCE_EXCLUDE=" --exclude=source/ --exclude=*.tar.gz --exclude=*.diff.gz --exclude=*.tar.bz2 --exclude=*.tar.xz --exclude=*.diff.bz2 --exclude=*.dsc " fi fi done @@ -379,11 +415,11 @@ if ! ( set -o noclobber; echo "$$" > "${LOCK}") 2> /dev/null; then if [ ${BASH_VERSINFO[0]} -gt 3 ] || [ -L /proc/self ]; then # We have a recent enough bash version, lets do it the easy way, # the lock will contain the right pid, thanks to $BASHPID - if ! $(kill -0 $(cat ${LOCK}) 2>/dev/null); then + if ! $(kill -0 $(< ${LOCK}) 2>/dev/null); then # Process does either not exist or is not owned by us. echo "$$" > "${LOCK}" else - echo "Unable to start rsync, lock file still exists, PID $(cat ${LOCK})" + echo "Unable to start rsync, lock file still exists, PID $(< ${LOCK})" exit 1 fi else @@ -445,6 +481,8 @@ fi export RSYNC_PASSWORD export RSYNC_PROXY +UPDATE_RETRIES=0 + while [ -e "${UPDATEREQUIRED}" ]; do log "Running mirrorsync, update is required, ${UPDATEREQUIRED} exists" @@ -487,17 +525,54 @@ while [ -e "${UPDATEREQUIRED}" ]; do # if we want stage2 *or* all if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then - log "Running stage2: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" + upstream_uip=false + for aupfile in "${TO}/Archive-Update-in-Progress-"*; do + case "$aupfile" in + "${TO}/Archive-Update-in-Progress-*") + error "Lock file is missing, this should not happen" + ;; + "${LOCK}") + : + ;; + *) + if [ -f "$aupfile" ]; then + # Remove the file, it will be synced again if + # upstream is still not done + rm -f "$aupfile" + else + log "AUIP file '$aupfile' is not really a file, weird" + fi + upstream_uip=true + ;; + esac + done - set +e - # We are lucky, it worked. Now do step 2 and sync again, this time including - # the packages/releases files - ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} \ - ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >>"${LOGDIR}/rsync-${NAME}.log" 2>>"${LOGDIR}/rsync-${NAME}.error" - result=$? - set -e + if [ "xtruex" = "x${upstream_uip}x" ]; then + log "Upstream archive update in progress, skipping stage2" + if [ ${UPDATE_RETRIES} -lt ${UIPRETRIES} ]; then + log "Retrying update in ${UIPSLEEP}" + touch "${UPDATEREQUIRED}" + UPDATE_RETRIES=$(($UPDATE_RETRIES+1)) + sleep "${UIPSLEEP}" + result=0 + else + error "Update has been retried ${UPDATEREQUIRED} times, aborting" + log "Perhaps upstream is still updating or there's a stale AUIP file" + result=1 + fi + else + log "Running stage2: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" + + set +e + # We are lucky, it worked. Now do step 2 and sync again, this time including + # the packages/releases files + ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} \ + ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >>"${LOGDIR}/rsync-${NAME}.log" 2>>"${LOGDIR}/rsync-${NAME}.error" + result=$? + set -e - log "Back from rsync with returncode ${result}" + log "Back from rsync with returncode ${result}" + fi else # Fake a good resultcode result=0 @@ -522,14 +597,46 @@ done # We only update our tracefile when we had a stage2 or an all sync. # Otherwise we would update it after stage1 already, which is wrong. + if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then - if [ -d "$(dirname "${TO}/${TRACE}")" ]; then - LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}" - echo "Used ftpsync version: ${VERSION}" >> "${TO}/${TRACE}" - echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}" - fi + case ${EXTENDEDTRACE} in + none) + log "No trace file wanted. Not creating one" + ;; + touch) + log "Just touching the trace file" + touch "${TO}/${TRACE}" + ;; + terse|full) + log "Creating a ${EXTENDEDTRACE} trace file" + if [ -d "$(dirname "${TO}/${TRACE}")" ]; then + LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}.new" + echo "Used ftpsync version: ${VERSION}" >> "${TO}/${TRACE}.new" + echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}.new" + if [ "xfullx" = "x${EXTENDEDTRACE}x" ]; then + GLOBALARCHLIST="source amd64 armel armhf hurd-i386 i386 ia64 kfreebsd-amd64 kfreebsd-i386 mips mipsel powerpc s390 s390x sparc" + + AEXCLUDE="^${ARCH_EXCLUDE// /\$|^}$" + ARCHLIST="" + for ARCH in ${GLOBALARCHLIST}; do + if ! [[ ${ARCH} =~ ${AEXCLUDE} ]]; then + ARCHLIST="${ARCHLIST} ${ARCH}" + fi + done + out="GUESSED:{${ARCHLIST}}" + echo "Architectures: ${out}" >> "${TO}/${TRACE}.new" + echo "Upstream-mirror: ${RSYNC_HOST}" >> "${TO}/${TRACE}.new" + fi # full trace + mv "${TO}/${TRACE}.new" "${TO}/${TRACE}" + fi + ;; + *) + error "Unsupported EXTENDEDTRACE value configured in ${BASEDIR}/etc/${NAME}.conf, please fix" + ;; + esac fi + HOOK=( HOOKNR=4 HOOKSCR=${HOOK4} @@ -545,6 +652,49 @@ fi # Remove the Archive-Update-in-Progress file before we push our downstreams. rm -f "${LOCK}" +# Check if there is a newer version of ftpsync. If so inform the admin, but not +# more than once every third day. +if [ -r "${TO}/project/ftpsync/LATEST.VERSION" ]; then + LATEST=$(< "${TO}/project/ftpsync/LATEST.VERSION") + if ! [[ ${LATEST} =~ [0-9]+ ]]; then + LATEST=0 + fi + if [ ${LATEST} -gt ${VERSION} ]; then + if [ -n "${MAILTO}" ]; then + difference=0 + if [ -f "${LOGDIR}/ftpsync.newversion" ]; then + stamptime=$(< "${LOGDIR}/ftpsync.newversion") + unixtime=$(date +%s) + difference=$(( $unixtime - $stamptime )) + fi + if [ ${difference} -ge 259200 ]; then + # Only warn every third day + mail -e -s "[$(hostname -s)] Update for ftpsync available" ${MAILTO} < "${LOGDIR}/ftpsync.newversion" + fi + fi + else + # Remove a possible stampfile + rm -f "${LOGDIR}/ftpsync.newversion" + fi +fi + if [ x${HUB} = "xtrue" ]; then # Trigger slave mirrors if we had a push for stage2 or all, or if its mhop if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ] || [ "xtruex" = "x${SYNCMHOP}x" ]; then diff --git a/bin/runmirrors b/bin/runmirrors index 0758c4d..53f97ff 100755 --- a/bin/runmirrors +++ b/bin/runmirrors @@ -215,7 +215,7 @@ while read MTYPE MLNAME MHOSTNAME MUSER MSSHOPT; do elif [ ${MSSHOPT:0:1} -eq 1 ] || [ ${MSSHOPT:0:1} -eq 2 ]; then # We do seem to have oldstyle options here. MPROTO=${MSSHOPT:0:1} - MKEYFILE=${MSSHOPT:1} + MKEYFILE=${MSSHOPT:2} SSHOPT="" else error "I don't know what is configured for mirror ${MLNAME}" diff --git a/etc/common b/etc/common index 55beb9b..49ca038 100644 --- a/etc/common +++ b/etc/common @@ -92,12 +92,12 @@ signal () { if [ "xallx" = "x${PUSHTYPE}x" ]; then # Default normal "fire and forget" push. We background that, we do not care about the mirrors doings - echo "Sending normal push" >> "${LOGDIR}/${MIRROR}.log" + log "Sending normal push" >> "${LOGDIR}/${MIRROR}.log" PUSHARGS1="sync:all" ssh -f $SSH_OPTIONS "${HOSTNAME}" "${PUSHARGS} ${PUSHARGS1}" >>"${LOGDIR}/${MIRROR}.log" elif [ "xstagedx" = "x${PUSHTYPE}x" ] || [ "xmhopx" = "x${PUSHTYPE}x" ]; then # Want a staged push. Fine, lets do that. Not backgrounded. We care about the mirrors doings. - echo "Sending staged push" >> "${LOGDIR}/${MIRROR}.log" + log "Sending staged push" >> "${LOGDIR}/${MIRROR}.log" # Only send stage1 if we havent already send it. When called with stage2, we already did. if [ "xstage2x" != "x${PUSHKIND}x" ]; then @@ -129,10 +129,10 @@ signal () { # This is a little racy, especially if the other parts decide to do this # at the same time, but it wont hurt more than a mail too much, so I don't care much if [ ${tries} -ge ${PUSHDELAY} ]; then - echo "Failed to wait for all other mirrors. Failed ones are:" >> "${LOGDIR}/${MIRROR}.log" + log "Failed to wait for all other mirrors. Failed ones are:" >> "${LOGDIR}/${MIRROR}.log" for file in ${PUSHLOCKS}; do if [ ! -f ${file} ]; then - echo "${file}" >> "${LOGDIR}/${MIRROR}.log" + log "${file}" >> "${LOGDIR}/${MIRROR}.log" error "Missing Pushlockfile ${file} after waiting ${tries} second, continuing" fi done @@ -150,7 +150,7 @@ signal () { return else PUSHARGS2="sync:stage2" - echo "Now doing the second stage push" >> "${LOGDIR}/${MIRROR}.log" + log "Now doing the second stage push" >> "${LOGDIR}/${MIRROR}.log" ssh $SSH_OPTIONS "${HOSTNAME}" "${PUSHARGS} ${PUSHARGS2}" >>"${LOGDIR}/${MIRROR}.log" 2>&1 fi else @@ -181,7 +181,7 @@ log () { # to the address configured in MAILTO (if non-empty) error () { log "$@" - if [ -z "${MAILTO}" ]; then + if [ -n "${MAILTO}" ]; then echo "$@" | mail -e -s "[$PROGRAM@$(hostname -s)] ERROR [$$]" ${MAILTO} fi } @@ -195,10 +195,14 @@ hook () { if [ -n "${HOOKSCR}" ]; then log "Running hook $HOOKNR: ${HOOKSCR}" set +e - "${HOOKSCR}" + ${HOOKSCR} result=$? set -e - log "Back from hook $HOOKNR, got returncode ${result}" + if [ ${result} -ne 0 ] ; then + error "Back from hook $HOOKNR, got returncode ${result}" + else + log "Back from hook $HOOKNR, got returncode ${result}" + fi return $result else return 0 @@ -228,3 +232,13 @@ savelog() { done mv "${torotate}" "${torotate}.0" } + +# Return rsync version +rsync_protocol() { + RSYNC_VERSION="$(${RSYNC} --version)" + RSYNC_REGEX="(protocol[ ]+version[ ]+([0-9]+))" + if [[ ${RSYNC_VERSION} =~ ${RSYNC_REGEX} ]]; then + echo ${BASH_REMATCH[2]} + fi + unset RSYNC_VERSION RSYNC_REGEX +} -- 2.11.0