From db2c5b8bde79ab1d6bea9799a24e09c985e95452 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Wed, 8 Jun 2011 21:56:38 +0100 Subject: [PATCH] Mirror admin stuff, initial commit. Includes the Debian `ftpsync' mirror script suite, machinery for general rsync(1) mirrors, and a skeleton configuration for Lee McLaughlin's mirror(1) script. By default the mirror does nothing useful whatever, and requires local configuration; this should be maintained on a different branch. --- .gitignore | 2 + .userv/rc | 8 + bin/dircombine | 62 ++++ bin/ftpsync | 582 +++++++++++++++++++++++++++++++++++ bin/pushpdo | 112 +++++++ bin/run-mirrors | 43 +++ bin/runmirrors | 286 +++++++++++++++++ bin/typicalsync | 168 ++++++++++ bin/udh | 13 + bin/websync | 304 ++++++++++++++++++ doc/ftpsync/README | 257 ++++++++++++++++ doc/ftpsync/ftpsync.conf.sample | 148 +++++++++ doc/ftpsync/logrotate.conf | 11 + doc/ftpsync/pushpdo.conf.sample | 40 +++ doc/ftpsync/pushpdo.mirror.sample | 21 ++ doc/ftpsync/runmirrors.conf.sample | 53 ++++ doc/ftpsync/runmirrors.mirror.sample | 72 +++++ doc/ftpsync/websync.conf.sample | 121 ++++++++ etc/common | 230 ++++++++++++++ etc/ftpsync-debian-security.conf | 148 +++++++++ etc/ftpsync-debian.conf | 184 +++++++++++ etc/mirror.packages | 27 ++ lib/functions.sh | 27 ++ 23 files changed, 2919 insertions(+) create mode 100644 .gitignore create mode 100644 .userv/rc create mode 100755 bin/dircombine create mode 100755 bin/ftpsync create mode 100755 bin/pushpdo create mode 100755 bin/run-mirrors create mode 100755 bin/runmirrors create mode 100755 bin/typicalsync create mode 100755 bin/udh create mode 100755 bin/websync create mode 100644 doc/ftpsync/README create mode 100644 doc/ftpsync/ftpsync.conf.sample create mode 100644 doc/ftpsync/logrotate.conf create mode 100644 doc/ftpsync/pushpdo.conf.sample create mode 100644 doc/ftpsync/pushpdo.mirror.sample create mode 100644 doc/ftpsync/runmirrors.conf.sample create mode 100644 doc/ftpsync/runmirrors.mirror.sample create mode 100644 doc/ftpsync/websync.conf.sample create mode 100644 etc/common create mode 100644 etc/ftpsync-debian-security.conf create mode 100644 etc/ftpsync-debian.conf create mode 100644 etc/mirror.packages create mode 100644 lib/functions.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e932c7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +log +var diff --git a/.userv/rc b/.userv/rc new file mode 100644 index 0000000..b86f0c0 --- /dev/null +++ b/.userv/rc @@ -0,0 +1,8 @@ +### -*-conf-*- + +if ( glob service run + & glob calling-group ftpadmin + ) + no-suppress-args + execute bin/run-mirrors +fi diff --git a/bin/dircombine b/bin/dircombine new file mode 100755 index 0000000..e029c39 --- /dev/null +++ b/bin/dircombine @@ -0,0 +1,62 @@ +#!/usr/bin/perl +# Uses symlinks to merge the files contained in a set of vcs +# checkouts to into a single directory. Keeps track of when files are +# removed from the merged directories and removes the symlinks. +# +# Only merges files that match the specified pattern. +# +# Note that the directories given to merge should be paths that will work +# for symlink targets from the destination directory (so either full paths, +# or they should be right inside the destination directory). +# +# Note that other files in the destination directory will be left as-is. +# +# Copyright 2006 by Joey Hess, licensed under the GPL. + +if (! @ARGV) { + die "usage: dircombine include-pattern dest dir1 [dir2 ...]\n"; +} + +my $pattern=shift; +my $dest=shift; + +foreach my $dir (@ARGV) { + my %known; + + # Link in each thing from the dir. + opendir(DIR, $dir) || die "opendir: $!"; + while ($_=readdir(DIR)) { + next if $_ eq '.' || $_ eq '..' || $_ eq 'known' || $_ eq '.svn' || $_ eq '.git' || $_ eq '.gitignore' || $_ eq '_darcs'; + next unless /$pattern/; + + $known{$_}=1; + + if (! -l "$dest/$_" && -e "$dest/$_") { + print STDERR "$_ in $dir is also in $dest\n"; + } + elsif (! -l "$dest/$_") { + system("ln", "-svf", "$dir/$_", $dest); + } + } + closedir(DIR); + + # Remove anything that was previously linked in but is not in the + # dir anymore. + if (-e "$dir/known") { + open(KNOWN, "$dir/known") || die "open $dir/known: $!"; + while () { + chomp; + if (! $known{$_}) { + system("rm", "-vf", "$dest/$_"); + } + } + close KNOWN; + } + + # Save state for next time. + open(KNOWN, ">$dir/known") || die "write $dir/known: $!"; + foreach my $file (sort keys %known) { + print KNOWN "$file\n"; + } + close KNOWN; +} diff --git a/bin/ftpsync b/bin/ftpsync new file mode 100755 index 0000000..484789d --- /dev/null +++ b/bin/ftpsync @@ -0,0 +1,582 @@ +#! /bin/bash +# No, we can not deal with sh alone. + +set -e +set -u +# ERR traps should be inherited from functions too. (And command +# substitutions and subshells and whatnot, but for us the function is +# the important part here) +set -E + +# ftpsync script for Debian +# Based losely on a number of existing scripts, written by an +# unknown number of different people over the years. +# +# Copyright (C) 2008,2009,2010,2011 Joerg Jaspert +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# In case the admin somehow wants to have this script located someplace else, +# he can set BASEDIR, and we will take that. If it is unset we take ${HOME} +# How the admin sets this isn't our place to deal with. One could use a wrapper +# for that. Or pam_env. Or whatever fits in the local setup. :) +BASEDIR=${BASEDIR:-"${HOME}"} + +# Script version. DO NOT CHANGE, *unless* you change the master copy maintained +# by Joerg Jaspert and the Debian mirroradm group. +# This is used to track which mirror is using which script version. +VERSION="80387" + +# Source our common functions +. "${BASEDIR}/etc/common" + +######################################################################## +######################################################################## +## functions ## +######################################################################## +######################################################################## +# We want to be able to get told what kind of sync we should do. This +# might be anything, from the archive to sync, the stage to do, etc. A +# list of currently understood and valid options is below. Multiple +# options are seperated by space. All the words have to have the word +# sync: in front or nothing will get used! +# +# Option Behaviour +# stage1 Only do stage1 sync +# stage2 Only do stage2 sync +# all Do a complete sync +# mhop Do a mhop sync, usually additionally to stage1 +# archive:foo Sync archive foo (if config for foo is available) +# callback Call back when done (needs proper ssh setup for this to +# work). It will always use the "command" callback:$HOSTNAME +# where $HOSTNAME is the one defined below/in config and +# will happen before slave mirrors are triggered. +# +# So to get us to sync all of the archive behind bpo and call back when +# we are done, a trigger command of +# "ssh $USER@$HOST sync:all sync:archive:bpo sync:callback" will do the +# trick. +check_commandline() { + while [ $# -gt 0 ]; do + case "$1" in + sync:stage1) + SYNCSTAGE1="true" + SYNCALL="false" + ;; + sync:stage2) + SYNCSTAGE2="true" + SYNCALL="false" + ;; + sync:callback) + SYNCCALLBACK="true" + ;; + sync:archive:*) + ARCHIVE=${1##sync:archive:} + # We do not like / or . in the remotely supplied archive name. + ARCHIVE=${ARCHIVE//\/} + ARCHIVE=${ARCHIVE//.} + ;; + sync:all) + SYNCALL="true" + ;; + sync:mhop) + SYNCMHOP="true" + ;; + *) + echo "Unknown option ${1} ignored" + ;; + esac + shift # Check next set of parameters. + done +} + +# All the stuff we want to do when we exit, no matter where +cleanup() { + trap - ERR TERM HUP INT QUIT EXIT + # all done. Mail the log, exit. + log "Mirrorsync done"; + + # Lets get a statistical value + SPEED="unknown" + if [ -f "${LOGDIR}/rsync-${NAME}.log" ]; then + SPEED=$( + SPEEDLINE=$(egrep '[0-9.]+ bytes/sec' "${LOGDIR}/rsync-${NAME}.log") + set "nothing" ${SPEEDLINE} + echo $8 + ) + if [ -n "${SPEED}" ]; then + SPEED=${SPEED%%.*} + SPEED=$(( $SPEED / 1024 )) + fi + fi + log "Rsync transfer speed: ${SPEED} KB/s" + + if [ -n "${MAILTO}" ]; then + # In case rsync had something on stderr + if [ -s "${LOGDIR}/rsync-${NAME}.error" ]; then + mail -e -s "[${PROGRAM}@$(hostname -s)] ($$) rsync ERROR on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} < "${LOGDIR}/rsync-${NAME}.error" + fi + if [ "x${ERRORSONLY}x" = "xfalsex" ]; then + # And the normal log + MAILFILES="${LOG}" + if [ "x${FULLLOGS}x" = "xtruex" ]; then + # Someone wants full logs including rsync + MAILFILES="${MAILFILES} ${LOGDIR}/rsync-${NAME}.log" + fi + cat ${MAILFILES} | mail -e -s "[${PROGRAM}@$(hostname -s)] archive sync finished on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} + fi + fi + + savelog "${LOGDIR}/rsync-${NAME}.log" + savelog "${LOGDIR}/rsync-${NAME}.error" + savelog "$LOG" > /dev/null + + rm -f "${LOCK}" +} + +# Check rsyncs return value +check_rsync() { + ret=$1 + msg=$2 + + # 24 - vanished source files. Ignored, that should be the target of $UPDATEREQUIRED + # and us re-running. If it's not, uplink is broken anyways. + case "${ret}" in + 0) return 0;; + 24) return 0;; + 23) return 2;; + 30) return 2;; + *) + error "ERROR: ${msg}" + return 1 + ;; + esac +} + +######################################################################## +######################################################################## + + +# As what are we called? +NAME="$(basename $0)" +# The original command line arguments need to be saved! +if [ $# -gt 0 ]; then + ORIGINAL_COMMAND=$* +else + ORIGINAL_COMMAND="" +fi + +SSH_ORIGINAL_COMMAND=${SSH_ORIGINAL_COMMAND:-""} +# Now, check if we got told about stuff via ssh +if [ -n "${SSH_ORIGINAL_COMMAND}" ]; then + # We deliberately add "nothing" and ignore it right again, to avoid + # people from outside putting some set options in the first place, + # making us parse them... + set "nothing" "${SSH_ORIGINAL_COMMAND}" + shift + # Yes, unqouted $* here. Or the function will only see it as one + # parameter, which doesnt help the case in it. + check_commandline $* +fi + +# Now, we can locally override all the above variables by just putting +# them into the .ssh/authorized_keys file forced command. +if [ -n "${ORIGINAL_COMMAND}" ]; then + set ${ORIGINAL_COMMAND} + check_commandline $* +fi + +# If we have been told to do stuff for a different archive than default, +# set the name accordingly. +ARCHIVE=${ARCHIVE:-""} +if [ -n "${ARCHIVE}" ]; then + NAME="${NAME}-${ARCHIVE}" +fi + +# Now source the config for the archive we run on. +# (Yes, people can also overwrite the options above in the config file +# if they want to) +if [ -f "${BASEDIR}/etc/${NAME}.conf" ]; then + . "${BASEDIR}/etc/${NAME}.conf" +else + echo "Nono, you can't tell us about random archives. Bad boy!" + exit 1 +fi + +######################################################################## +# Config options go here. Feel free to overwrite them in the config # +# file if you need to. # +# On debian.org machines the defaults should be ok. # +# # +# The following extra variables can be defined in the config file: # +# # +# ARCH_EXCLUDE # +# can be used to exclude a complete architecture from # +# mirrorring. Use as space seperated list. # +# Possible values are: # +# alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, # +# mipsel, mips, powerpc, s390, sparc, kfreebsd-i386, kfreebsd-amd64 # +# and source. # +# eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" # +# # +# An unset value will mirror all architectures # +######################################################################## + +######################################################################## +# There should be nothing to edit here, use the config file # +######################################################################## +MIRRORNAME=${MIRRORNAME:-$(hostname -f)} +# Where to put logfiles in +LOGDIR=${LOGDIR:-"${BASEDIR}/log"} +# Our own logfile +LOG=${LOG:-"${LOGDIR}/${NAME}.log"} + +# Where should we put all the mirrored files? +TO=${TO:-"/org/ftp.debian.org/ftp/"} + +# used by log() and error() +PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"} + +# Where to send mails about mirroring to? +if [ "x$(hostname -d)x" != "xdebian.orgx" ]; then + # We are not on a debian.org host + MAILTO=${MAILTO:-"root"} +else + # Yay, on a .debian.org host + MAILTO=${MAILTO:-"mirrorlogs@debian.org"} +fi +# Want errors only or every log? +ERRORSONLY=${ERRORSONLY:-"true"} +# Want full logs, ie. including the rsync one? +FULLLOGS=${FULLLOGS:-"false"} + +# How many logfiles to keep +LOGROTATE=${LOGROTATE:-14} + +# Our lockfile +LOCK=${LOCK:-"${TO}/Archive-Update-in-Progress-${MIRRORNAME}"} +# timeout for the lockfile, in case we have bash older than v4 (and no /proc) +LOCKTIMEOUT=${LOCKTIMEOUT:-3600} +# Do we need another rsync run? +UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" +# Trace file for mirror stats and checks (make sure we get full hostname) +TRACE=${TRACE:-"project/trace/${MIRRORNAME}"} + +# rsync program +RSYNC=${RSYNC:-rsync} +# Rsync filter rules. Used to protect various files we always want to keep, even if we otherwise delete +# excluded files +RSYNC_FILTER=${RSYNC_FILTER:-"--filter=protect_Archive-Update-in-Progress-${MIRRORNAME} --filter=protect_${TRACE} --filter=protect_Archive-Update-Required-${MIRRORNAME}"} +# limit I/O bandwidth. Value is KBytes per second, unset or 0 is unlimited +RSYNC_BW=${RSYNC_BW:-0} +# Default rsync options for *every* rsync call +RSYNC_OPTIONS=${RSYNC_OPTIONS:-"-prltvHSB8192 --timeout 3600 --stats ${RSYNC_FILTER}"} +# Options we only use in the first pass, where we do not want packages/sources to fly in yet and don't want to delete files +RSYNC_OPTIONS1=${RSYNC_OPTIONS1:-"--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*"} +# Options for the second pass, where we do want everything, including deletion of old and now unused files +RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} +# Which rsync share to use on our upstream mirror? +RSYNC_PATH=${RSYNC_PATH:-"ftp"} + +# Now add the bwlimit option. As default is 0 we always add it, rsync interprets +# 0 as unlimited, so this is safe. +RSYNC_OPTIONS="--bwlimit=${RSYNC_BW} ${RSYNC_OPTIONS}" + +# We have no default host to sync from, but will error out if its unset +RSYNC_HOST=${RSYNC_HOST:-""} +# Error out if we have no host to sync from +if [ -z "${RSYNC_HOST}" ]; then + error "Missing a host to mirror from, please set RSYNC_HOST variable in ${BASEDIR}/etc/${NAME}.conf" +fi + +# our username for the rsync share +RSYNC_USER=${RSYNC_USER:-""} +# the password +RSYNC_PASSWORD=${RSYNC_PASSWORD:-""} + +# a possible proxy +RSYNC_PROXY=${RSYNC_PROXY:-""} + +# Do we sync stage1? +SYNCSTAGE1=${SYNCSTAGE1:-"false"} +# Do we sync stage2? +SYNCSTAGE2=${SYNCSTAGE2:-"false"} +# Do we sync all? +SYNCALL=${SYNCALL:-"true"} +# Do we have a mhop sync? +SYNCMHOP=${SYNCMHOP:-"false"} +# Do we callback? +SYNCCALLBACK=${SYNCCALLBACK:-"false"} +# If we call back we need some more options defined in the config file. +CALLBACKUSER=${CALLBACKUSER:-"archvsync"} +CALLBACKHOST=${CALLBACKHOST:-"none"} +CALLBACKKEY=${CALLBACKKEY:-"none"} + +# General excludes. Don't list architecture specific stuff here, use ARCH_EXCLUDE for that! +EXCLUDE=${EXCLUDE:-""} + +# The temp directory used by rsync --delay-updates is not +# world-readable remotely. Always exclude it to avoid errors. +EXCLUDE="${EXCLUDE} --exclude .~tmp~/" + +SOURCE_EXCLUDE=${SOURCE_EXCLUDE:-""} +ARCH_EXCLUDE=${ARCH_EXCLUDE:-""} +# Exclude architectures defined in $ARCH_EXCLUDE +for ARCH in ${ARCH_EXCLUDE}; do + EXCLUDE="${EXCLUDE} --exclude binary-${ARCH}/ --exclude installer-${ARCH}/ --exclude Contents-${ARCH}.gz --exclude Contents-${ARCH}.bz2 --exclude Contents-${ARCH}.diff/ --exclude arch-${ARCH}.files --exclude arch-${ARCH}.list.gz --exclude *_${ARCH}.deb --exclude *_${ARCH}.udeb --exclude *_${ARCH}.changes" + if [ "${ARCH}" = "source" ]; then + if [ -z ${SOURCE_EXCLUDE} ]; then + SOURCE_EXCLUDE=" --exclude source/ --exclude *.tar.gz --exclude *.diff.gz --exclude *.tar.bz2 --exclude *.diff.bz2 --exclude *.dsc " + fi + fi +done + +# Hooks +HOOK1=${HOOK1:-""} +HOOK2=${HOOK2:-""} +HOOK3=${HOOK3:-""} +HOOK4=${HOOK4:-""} +HOOK5=${HOOK5:-""} + +# Are we a hub? +HUB=${HUB:-"false"} + +######################################################################## +# Really nothing to see below here. Only code follows. # +######################################################################## +######################################################################## + +# Some sane defaults +cd "${BASEDIR}" +umask 022 + +# If we are here for the first time, create the +# destination and the trace directory +mkdir -p "${TO}/project/trace" + +# Used to make sure we will have the archive fully and completly synced before +# we stop, even if we get multiple pushes while this script is running. +# Otherwise we can end up with a half-synced archive: +# - get a push +# - sync, while locked +# - get another push. Of course no extra sync run then happens, we are locked. +# - done. Archive not correctly synced, we don't have all the changes from the second push. +touch "${UPDATEREQUIRED}" + +# Check to see if another sync is in progress +if ! ( set -o noclobber; echo "$$" > "${LOCK}") 2> /dev/null; then + if [ ${BASH_VERSINFO[0]} -gt 3 ] || [ -L /proc/self ]; then + # We have a recent enough bash version, lets do it the easy way, + # the lock will contain the right pid, thanks to $BASHPID + if ! $(kill -0 $(cat ${LOCK}) 2>/dev/null); then + # Process does either not exist or is not owned by us. + echo "$$" > "${LOCK}" + else + echo "Unable to start rsync, lock file still exists, PID $(cat ${LOCK})" + exit 1 + fi + else + # Old bash, means we dont have the right pid in our lockfile + # So take a different way - guess if it is still there by comparing its age. + # Not optimal, but hey. + stamptime=$(date --reference="${LOCK}" +%s) + unixtime=$(date +%s) + difference=$(( $unixtime - $stamptime )) + if [ ${difference} -ge ${LOCKTIMEOUT} ]; then + # Took longer than LOCKTIMEOUT minutes? Assume it broke and take the lock + echo "$$" > "${LOCK}" + else + echo "Unable to start rsync, lock file younger than one hour" + exit 1 + fi + fi +fi + +# When we exit normally we call cleanup on our own. Otherwise we want it called by +# this trap. (We can not trap on EXIT, because that is called when the main script +# exits. Which also happens when we background the mainroutine, ie. while we still +# run!) +trap cleanup ERR TERM HUP INT QUIT + +# Start log by redirecting stdout and stderr there and closing stdin +exec >"$LOG" 2>&1 <&- +log "Mirrorsync start" + +# Look who pushed us and note that in the log. +PUSHFROM="${SSH_CONNECTION%%\ *}" +if [ -n "${PUSHFROM}" ]; then + log "We got pushed from ${PUSHFROM}" +fi + +if [ "xtruex" = "x${SYNCCALLBACK}x" ]; then + if [ "xnonex" = "x${CALLBACKHOST}x" ] || [ "xnonex" = "x${CALLBACKKEY}x" ]; then + SYNCCALLBACK="false" + error "We are asked to call back, but we do not know where to and do not have a key, ignoring callback" + fi +fi + +HOOK=( + HOOKNR=1 + HOOKSCR=${HOOK1} +) +hook $HOOK + +# Now, we might want to sync from anonymous too. +# This is that deep in this script so hook1 could, if wanted, change things! +if [ -z ${RSYNC_USER} ]; then + RSYNCPTH="${RSYNC_HOST}" +else + RSYNCPTH="${RSYNC_USER}@${RSYNC_HOST}" +fi + +# Now do the actual mirroring, and run as long as we have an updaterequired file. +export RSYNC_PASSWORD +export RSYNC_PROXY + +while [ -e "${UPDATEREQUIRED}" ]; do + log "Running mirrorsync, update is required, ${UPDATEREQUIRED} exists" + + # if we want stage1 *or* all + if [ "xtruex" = "x${SYNCSTAGE1}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then + while [ -e "${UPDATEREQUIRED}" ]; do + rm -f "${UPDATEREQUIRED}" + log "Running stage1: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS1} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" + + set +e + # Step one, sync everything except Packages/Releases + ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS1} ${EXCLUDE} ${SOURCE_EXCLUDE} \ + ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >"${LOGDIR}/rsync-${NAME}.log" 2>"${LOGDIR}/rsync-${NAME}.error" + result=$? + set -e + + log "Back from rsync with returncode ${result}" + done + else + # Fake a good resultcode + result=0 + fi # Sync stage 1? + rm -f "${UPDATEREQUIRED}" + + set +e + check_rsync $result "Sync step 1 went wrong, got errorcode ${result}. Logfile: ${LOG}" + GO=$? + set -e + if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then + log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now" + elif [ ${GO} -ne 0 ]; then + exit 3 + fi + + HOOK=( + HOOKNR=2 + HOOKSCR=${HOOK2} + ) + hook $HOOK + + # if we want stage2 *or* all + if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then + log "Running stage2: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" + + set +e + # We are lucky, it worked. Now do step 2 and sync again, this time including + # the packages/releases files + ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} \ + ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >>"${LOGDIR}/rsync-${NAME}.log" 2>>"${LOGDIR}/rsync-${NAME}.error" + result=$? + set -e + + log "Back from rsync with returncode ${result}" + else + # Fake a good resultcode + result=0 + fi # Sync stage 2? + + set +e + check_rsync $result "Sync step 2 went wrong, got errorcode ${result}. Logfile: ${LOG}" + GO=$? + set -e + if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then + log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now" + elif [ ${GO} -ne 0 ]; then + exit 4 + fi + + HOOK=( + HOOKNR=3 + HOOKSCR=${HOOK3} + ) + hook $HOOK +done + +# We only update our tracefile when we had a stage2 or an all sync. +# Otherwise we would update it after stage1 already, which is wrong. +if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then + if [ -d "$(dirname "${TO}/${TRACE}")" ]; then + LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}" + echo "Used ftpsync version: ${VERSION}" >> "${TO}/${TRACE}" + echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}" + fi +fi + +HOOK=( + HOOKNR=4 + HOOKSCR=${HOOK4} +) +hook $HOOK + +if [ "xtruex" = "x${SYNCCALLBACK}x" ]; then + set +e + callback ${CALLBACKUSER} ${CALLBACKHOST} "${CALLBACKKEY}" + set -e +fi + +# Remove the Archive-Update-in-Progress file before we push our downstreams. +rm -f "${LOCK}" + +if [ x${HUB} = "xtrue" ]; then + # Trigger slave mirrors if we had a push for stage2 or all, or if its mhop + if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ] || [ "xtruex" = "x${SYNCMHOP}x" ]; then + RUNMIRRORARGS="" + if [ -n "${ARCHIVE}" ]; then + # We tell runmirrors about the archive we are running on. + RUNMIRRORARGS="-a ${ARCHIVE}" + fi + # We also tell runmirrors that we are running it from within ftpsync, so it can change + # the way it works with mhop based on that. + RUNMIRRORARGS="${RUNMIRRORARGS} -f" + + if [ "xtruex" = "x${SYNCSTAGE1}x" ]; then + # This is true when we have a mhop sync. A normal multi-stage push sending stage1 will + # not get to this point. + # So if that happens, tell runmirrors we are doing mhop + RUNMIRRORARGS="${RUNMIRRORARGS} -k mhop" + elif [ "xtruex" = "x${SYNCSTAGE2}x" ]; then + RUNMIRRORARGS="${RUNMIRRORARGS} -k stage2" + elif [ "xtruex" = "x${SYNCALL}x" ]; then + RUNMIRRORARGS="${RUNMIRRORARGS} -k all" + fi + log "Trigger slave mirrors using ${RUNMIRRORARGS}" + ${BASEDIR}/bin/runmirrors ${RUNMIRRORARGS} + log "Trigger slave done" + + HOOK=( + HOOKNR=5 + HOOKSCR=${HOOK5} + ) + hook $HOOK + fi +fi + +# All done, lets call cleanup +cleanup diff --git a/bin/pushpdo b/bin/pushpdo new file mode 100755 index 0000000..91df528 --- /dev/null +++ b/bin/pushpdo @@ -0,0 +1,112 @@ +#! /bin/bash + +set -e +set -u + +# psuhpdo script for Debian +# +# Copyright (C) 2008 Joerg Jaspert +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# In case the admin somehow wants to have this script located someplace else, +# he can set BASEDIR, and we will take that. If it is unset we take ${HOME} +BASEDIR=${BASEDIR:-"${HOME}"} + +NAME="`basename $0`" + + +# Read our config file +. "${BASEDIR}/etc/${NAME}.conf" + +# Source our common functions +. "${BASEDIR}/etc/common" + +# Set sane defaults if the configfile didn't do that for us. +# The directory for our logfiles +LOGDIR=${LOGDIR:-"${BASEDIR}/log"} +# Our own logfile +LOG=${LOG:-"${LOGDIR}/${NAME}.log"} +# How many logfiles to keep +LOGROTATE=${LOGROTATE:-14} +# Our mirrorfile +MIRRORS=${MIRRORS:-"${BASEDIR}/etc/${NAME}.mirror"} +# used by log() +PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"} +# extra ssh options we might want hostwide +SSH_OPTS=${SSH_OPTS:-""} +# Which ssh key to use? +KEYFILE=${KEYFILE:-".ssh/pushpackages"} +# which path to "mirror" +MIRRORPATH=${MIRRORPATH:-"/org/packages.debian.org/mirror/"} +# where to send mails to +if [ "x$(hostname -s)x" != "x${MIRRORNAME%%.debian.org}x" ]; then + # We are not on a debian.org host + MAILTO=${MAILTO:-"root"} +else + # Yay, on a .debian.org host + MAILTO=${MAILTO:-"mirrorlogs@debian.org"} +fi + +if ! [ -f "${BASEDIR}/${KEYFILE}" ]; then + error "SSH Key ${BASEDIR}/${KEYFILE} does not exist" >> ${LOG} + exit 5 +fi + +# Some sane defaults +cd ${BASEDIR} +umask 022 + +# Make sure we have our log and lock directories +mkdir -p "${LOGDIR}" + +trap 'log "Pdopush done" >> ${LOG}; savelog "${LOG}" > /dev/null' EXIT + +log "Pushing pdo mirrors" >> ${LOG} + +# From here on we do *NOT* want to exit on errors. We don't want to +# stop pushing mirrors just because we can't reach one of them. +set +e + + +# Now read our mirrorfile and push the mirrors defined in there. +# We use grep to easily sort out all lines having a # in front of them or are empty. +egrep -v '^[[:space:]]*(#|$)' "${MIRRORS}" | +while read MLNAME MHOSTNAME MUSER MPROTO MKEYFILE; do + # Process the two options that can be left blank in the config + if [ -z ${MPROTO} ]; then + MPROTO=2 + fi + if [ -z ${MKEYFILE} ]; then + MKEYFILE="${BASEDIR}/${KEYFILE}" + fi + # Now, people can do stupid things and leave out the protocol, but + # define a keyfile... + if [ ${MPROTO} -ne 1 ] && [ ${MPROTO} -ne 2 ]; then + error "Need a correct ssh protocol version for ${MLNAME}, skipping" >> ${LOG} + continue + fi + + # And finally, push the mirror + log "Pushing ${MLNAME}" >> ${LOG} + # This needs a limited ssh key on the other side, something like + # no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="rsync --server -vlogDtpr . /srv/mirrors/packages.debian.org/",from="87.106.64.223,2001:8d8:80:11::35d,powell.debian.org" ssh-rsa..... + rsync -e "ssh -i ${MKEYFILE} -${MPROTO} ${SSH_OPTS}" -av --stats "${MIRRORPATH}" ${MUSER}@${MHOSTNAME}:/does/not/matter >"${LOGDIR}/${MLNAME}.log" + log "Pushing ${MLNAME} done" >> ${LOG} + savelog ${LOGDIR}${MLNAME}.log + + set +e +done + +exit 0 diff --git a/bin/run-mirrors b/bin/run-mirrors new file mode 100755 index 0000000..ec43009 --- /dev/null +++ b/bin/run-mirrors @@ -0,0 +1,43 @@ +#! /bin/sh + +set -e + +## Make sure we're running as the right user. +case "$USER" in + mirror) ;; + *) exec userv -fstdin=/dev/null mirror run ;; +esac + +## Set up a plausible environment. +HOME=/mnt/ftp/mirror-admin; export HOME; cd +PATH=$HOME/bin:/usr/bin:/usr/sbin:/bin:/sbin:/usr/local/bin:/usr/local/sbin +export PATH +umask 002 + +## Make sure we're running with a lock file. +case "${MIRROR_LOCKED-nil}" in + nil) exec env MIRROR_LOCKED=t locking -f var/mirror.lock "$0" "$@" ;; +esac + +## Before we start, rotate the logs. (Doing things this way means that we +## can be sure we don't lose new logs, even if the log rotation goes +## completely mental. +logrotate -s var/logrotate.state etc/logrotate.conf + +## Let SIGINT take out the children only. +trap "" INT + +## Now do the various mirroring things. +for file in $(run-parts --list etc/mirrors.d); do + [ -x "$file" ] || continue + base=${file##*/}; base=${base#[0-9]*-} + ( + echo + echo "***--------------------------------------------------" + echo "*** Running $base at $(date +%Y-%m-%dT%H:%M:%S)" + echo + set +e; (trap - INT; exec "$file"); rc=$?; set -e + echo + echo "*** Finished $(date +%Y-%m-%dT%H:%M:%S); rc = $rc" + ) >>log/$base.log 2>&1 +done diff --git a/bin/runmirrors b/bin/runmirrors new file mode 100755 index 0000000..0758c4d --- /dev/null +++ b/bin/runmirrors @@ -0,0 +1,286 @@ +#! /bin/bash + +set -e +set -u + +# runmirrors script for Debian +# Based losely on existing scripts, written by an unknown number of +# different people over the years. +# +# Copyright (C) 2008, 2009 Joerg Jaspert +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# In case the admin somehow wants to have this script located someplace else, +# he can set BASEDIR, and we will take that. If it is unset we take ${HOME} +BASEDIR=${BASEDIR:-"${HOME}"} + +NAME="$(basename $0)" + +HELP="$0, (C) 2008, 2009 by Joerg Jaspert \n +Usage:\n\n + +1.) a single parameter with NO leading -.\n +\t This will will then be used as the addition for our configfile. Ie. \`$0 security\` will\n +\t have us look for ${NAME}-security.{conf,mirror} files.\n\n + +2.) using getopt style parameters:\n +\t -a [NAME] - Same as 1.) above, used for the config files. Default empty.\n +\t -k [TYPE] - Type of push. all, stage2, mhop. Default mhop.\n +\t -f - Run from within the mirrorscript ftpsync. Don't use from commandline!\n +\t -h - Print this help and exit +" +# If we got options, lets see if we use newstyle options, or oldstyle. If oldstyle +# it will not start with a -. If we find oldstyle we assume its only one, the config +# name we run on. +if [ $# -gt 0 ]; then + if [ "x${1:0:1}x" != "x-x" ]; then + # Yes, does not start with a -, so use it for the config name. + CONF=${1:-""} + if [ -n "${CONF}" ]; then + NAME="${NAME}-${CONF}" + fi + else + # Yeah well, new style, starting with - for getopts + while getopts ':a:k:fh' OPTION ; do + case $OPTION in + a) CONF="${OPTARG}" + if [ -n "${CONF}" ]; then + NAME="${NAME}-${CONF}" + fi + ;; + k) PUSHKIND="${OPTARG}" + ;; + f) FROMFTPSYNC="true" + ;; + h) echo -e $HELP + exit 0 + ;; + + *) echo "Invalid usage" + echo -e $HELP + exit 1 + ;; + esac + done + fi +fi +# Make sure the values are always defined, even if there was no commandline option +# for them +# Default config is empty +CONF=${CONF:-""} + +# Set the default to all, if we didnt get told about it. Currently +# valid: all - normal push. mhop - multi-hop multi-stage push, this is stage1, +# stage2 - staged push, second phase. Default is mhop. +PUSHKIND=${PUSHKIND:-"mhop"} + +# If we are pushed from within ftpsync. Default false. +FROMFTPSYNC=${FROMFTPSYNC:-"false"} + +######################################################################## +# Read our config file +. "${BASEDIR}/etc/${NAME}.conf" + +# Source our common functions +. "${BASEDIR}/etc/common" + +# Set sane defaults if the configfile didn't do that for us. +# The directory for our logfiles +LOGDIR=${LOGDIR:-"${BASEDIR}/log"} +# Our own logfile +LOG=${LOG:-"${LOGDIR}/${NAME}.log"} +# Our lockfile directory +LOCKDIR=${LOCKDIR:-"${BASEDIR}/locks"} +# How many logfiles to keep +LOGROTATE=${LOGROTATE:-14} +# Our mirrorfile +MIRRORS=${MIRRORS:-"${BASEDIR}/etc/${NAME}.mirror"} +# used by log() +PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"} +# extra ssh options we might want hostwide +SSH_OPTS=${SSH_OPTS:-"-o StrictHostKeyChecking=no"} +# Whats our archive name? We will also tell our leafs about it +PUSHARCHIVE=${PUSHARCHIVE:-"${CONF}"} +# How long to wait for mirrors to do stage1 if we have multi-stage syncing +PUSHDELAY=${PUSHDELAY:-600} +# Which ssh key to use? +KEYFILE=${KEYFILE:-".ssh/pushmirror"} +# where to send mails to +if [ "x$(hostname -d)x" != "xdebian.orgx" ]; then + # We are not on a debian.org host + MAILTO=${MAILTO:-"root"} +else + # Yay, on a .debian.org host + MAILTO=${MAILTO:-"mirrorlogs@debian.org"} +fi + +if ! [ -f "${BASEDIR}/${KEYFILE}" ]; then + error "SSH Key ${BASEDIR}/${KEYFILE} does not exist" >> "${LOG}" + exit 5 +fi + +# Hooks +HOOK1=${HOOK1:-""} +HOOK2=${HOOK2:-""} +HOOK3=${HOOK3:-""} + +######################################################################## + +# Some sane defaults +cd "${BASEDIR}" +umask 022 + +# Make sure we have our log and lock directories +mkdir -p "${LOGDIR}" +mkdir -p "${LOCKDIR}" + +trap 'log "Mirrorpush done" >> "${LOG}"; savelog "${LOG}" > /dev/null' EXIT + +log "Pushing leaf mirrors. Inside ftpsync: ${FROMFTPSYNC}. Pushkind: ${PUSHKIND}" >> "${LOG}" + +HOOK=( + HOOKNR=1 + HOOKSCR=${HOOK1} +) +hook $HOOK + +# From here on we do *NOT* want to exit on errors. We don't want to +# stop pushing mirrors just because we can't reach one of them. +set +e + +# Built up our list of 2-stage mirrors. +PUSHLOCKS="" +PUSHLOCKS=$(get2stage) + +# In case we have it - remove. It is used to synchronize multi-stage mirroring +rm -f "${LOCKDIR}/all_stage1" + +# Now read our mirrorfile and push the mirrors defined in there. +# We use grep to easily sort out all lines having a # in front of them or are empty. +egrep -v '^[[:space:]]*(#|$)' "${MIRRORS}" | +while read MTYPE MLNAME MHOSTNAME MUSER MSSHOPT; do + if [ "x${MTYPE}x" = "xDELAYx" ]; then + # We should wait a bit. + if [ -z ${MLNAME} ]; then + MLNAME=600 + fi + log "Delay of ${MLNAME} requested, sleeping" >> "${LOG}" + sleep ${MLNAME} + continue + fi + + # If we are told we have a mhop sync to do and are called from within ftpsync, + # we will only look at staged/mhop entries and ignore the rest. + if [ "x${PUSHKIND}x" = "xmhopx" ] && [ "x${FROMFTPSYNC}x" = "xtruex" ]; then + if [ "x${MTYPE}x" != "xstagedx" ] && [ "x${MTYPE}x" != "xmhopx" ]; then + continue + fi + fi + + # Now, MSSHOPT may start with a -. In that case the whole rest of the line is taken + # as a set of options to give to ssh, we pass it without doing anything with it. + # If it starts with a 1 or 2 then it will tell us about the ssh protocol version to use, + # and also means we look if there is one value more after a space. That value would then + # be the ssh keyfile we use with -i. That gives us full flexibility for all + # ssh options but doesn't destroy backwards compatibility. + # If it is empty we assume proto 2 and the default keyfile. + # + # There is one bug in here. We will give out the master keyfile, even if there is a + # "-i /bla/bla" in the options. ssh stuffs them together and presents two keys to the + # target server. In the case both keys do some actions- the first one presented wins. + # And this might not be what one wants. + # + # The only sane way to go around this, i think, is by dropping backward compability. + # Which I don't really like. + if [ -n "${MSSHOPT}" ]; then + # So its not empty, lets check if it starts with a - and as such is a "new-style" + # ssh options set. + if [ "x${MSSHOPT:0:1}x" = "x-x" ]; then + # Yes we start with a - + SSHOPT="${MSSHOPT}" + MPROTO="99" + MKEYFILE="${BASEDIR}/${KEYFILE}" + elif [ ${MSSHOPT:0:1} -eq 1 ] || [ ${MSSHOPT:0:1} -eq 2 ]; then + # We do seem to have oldstyle options here. + MPROTO=${MSSHOPT:0:1} + MKEYFILE=${MSSHOPT:1} + SSHOPT="" + else + error "I don't know what is configured for mirror ${MLNAME}" + continue + fi + else + MPROTO=2 + MKEYFILE="${BASEDIR}/${KEYFILE}" + SSHOPT="" + fi + + # Built our array + SIGNAL_OPTS=( + MIRROR="${MLNAME}" + HOSTNAME="${MHOSTNAME}" + USERNAME="${MUSER}" + SSHPROTO="${MPROTO}" + SSHKEY="${MKEYFILE}" + SSHOPTS="${SSHOPT/ /#}" + PUSHLOCKOWN="${LOCKDIR}/${MLNAME}.stage1" + PUSHTYPE="${MTYPE}" + PUSHARCHIVE=${PUSHARCHIVE} + PUSHKIND=${PUSHKIND} + FROMFTPSYNC=${FROMFTPSYNC} + ) + + # And finally, push the mirror + log "Trigger ${MLNAME}" >> "${LOG}" + signal "${SIGNAL_OPTS}" & + log "Trigger for ${MLNAME} done" >> "${LOG}" + + HOOK=( + HOOKNR=2 + HOOKSCR=${HOOK2} + ) + hook $HOOK + set +e +done + +# If we are run from within ftpsync *and* have an mhop push to send on, we have +# to wait until the push is gone through and they all returned, or we will exit +# much too early. +# As the signal routine touches $LOCKDIR/all_stage1 when all are done, its +# easy enough just to wait for that to appear. Of course we do the same game +# with PUSHDELAY to not wait forever. +if [ "xtruex" = "x${FROMFTPSYNC}x" ] && [ "xmhopx" = "x${PUSHKIND}x" ]; then + tries=0 + # We do not wait forever + while [ ${tries} -lt ${PUSHDELAY} ]; do + if [ -f "${LOCKDIR}/all_stage1" ]; then + break + fi + tries=$((tries + 5)) + sleep 5 + done + + if [ ${tries} -ge ${PUSHDELAY} ]; then + error "Failed to wait for our mirrors when sending mhop push down." >> "${LOG}" + fi +fi + +HOOK=( + HOOKNR=3 + HOOKSCR=${HOOK3} +) +hook $HOOK + +exit 0 diff --git a/bin/typicalsync b/bin/typicalsync new file mode 100755 index 0000000..dcd57e1 --- /dev/null +++ b/bin/typicalsync @@ -0,0 +1,168 @@ +#!/usr/bin/perl -wT + +# Copyright (c) 2006 Anthony Towns +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +use strict; +use Fcntl ':flock'; +use File::Find; +use POSIX qw(strftime); + +# configuration: + +my $local_dir = "/srv/ftp.debian.org/mirror"; +my $rsync_host = undef; #"merkel.debian.org"; +my $rsync_dir = undef; #"debian"; + +my $dest = "/srv/ftp.debian.org/rsync/typical"; +my $max_del = 1000; + +$ENV{"PATH"} = "/bin:/usr/bin"; + +# program + +my $hostname = `/bin/hostname -f`; +die "bad hostname" unless $hostname =~ m/^([a-zA-Z0-9._-]+)/; +$hostname = $1; + +my $lockfile = "./Archive-Update-in-Progress-$hostname"; + +unless (open LKFILE, "> $dest/$lockfile" and flock(LKFILE, LOCK_EX)) { + print "$hostname is unable to start sync, lock file exists\n"; + exit(1); +} + +if (defined $rsync_host && defined $rsync_dir) { + system("rsync --links --hard-links --times --verbose --recursive" + ." --delay-updates --files-from :indices/files/typical.files" + ." rsync://$rsync_host/$rsync_dir/ $dest/"); +} else { + open FILELIST, "< $local_dir/indices/files/typical.files" + or die "typical.files index not found"; + while () { + chomp; + m/^(.*)$/; $_ = $1; + my @l = lstat("$local_dir/$_"); + next unless (@l); + + if (-l _) { + my $lpath = readlink("$local_dir/$_"); + $lpath =~ m/^(.*)$/; $lpath = $1; + if (-l "$dest/$_") { + next if ($lpath eq readlink("$dest/$_")); + } + + unless (mk_dirname_as_dirs($dest, $_)) { + print "E: couldn't create path for $_\n"; + next; + } + + if (-d "$dest/$_") { + rename "$dest/$_", "$dest/$_.remove" or print "E: couldn't rename old dir $_ out of the way\n"; + } elsif (-e "$dest/$_") { + unlink("$dest/$_") or print "E: couldn't unlink $_\n"; + } + symlink($lpath, "$dest/$_") or print "E: couldn't create $_ as symlink to $lpath\n"; + next; + } + + next if (-d _); + + unless (mk_dirname_as_dirs($dest, $_)) { + print "E: couldn't create path for $_\n"; + next; + } + + my @d = lstat("$dest/$_"); + if (@d) { + if (-d _) { + rename("$dest/$_", "$dest/$_.remove") or print "E: couldn't rename old dir $_ out of the way\n"; + } else { + next if (@l and @d and $l[0] == $d[0] and $l[1] == $d[1]); + #next if (@l and @d and $l[7] == $d[7]); + print "I: updating $_\n"; + unlink("$dest/$_"); + } + } + + link("$local_dir/$_", "$dest/$_") or print "E: couldn't link $_\n"; + } + close(FILELIST); +} + +print "Files synced, now deleting any unnecessary files\n"; + +my %expected_files = (); +open FILES, "< $dest/indices/files/typical.files" + or die "typical.files index not found"; +while () { + chomp; + $expected_files{$_} = 1; +} +close(FILES); + +chdir($dest); + +my $del_count = 0; +my $last = ''; +finddepth({wanted => \&wanted, no_chdir => 1}, "."); + +open TRACE, "> $dest/project/trace/$hostname" or die "couldn't open trace"; +print TRACE strftime("%a %b %e %H:%M:%S UTC %Y", gmtime) . "\n"; +close TRACE; + +close LKFILE; +unlink("$dest/$lockfile"); +exit(0); + +sub wanted { + my ($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_); + if (-d _) { + if (substr($last, 0, length($_) + 1) ne "$_/") { + print "Deleting empty directory: $_\n"; + $_ = m/^(.*)$/; + my $f = $1; + rmdir($f); + } else { + $last = $_; + } + } elsif ($_ =~ m|^\./project/trace/| or $_ eq $lockfile) { + $last = $_; + } elsif (defined $expected_files{$_}) { + $last = $_; + } elsif ($del_count < $max_del) { + $del_count++; + print "Deleting file: $_\n"; + $_ = m/^(.*)$/; + my $f = $1; + unlink($f); + } +} + +sub mk_dirname_as_dirs { + my ($base, $file) = @_; + while ($file =~ m,^/*([^/]+)/+([^/].*)$,) { + $file = $2; + $base = "$base/$1"; + my @blah = lstat($base); + if (!@blah) { + mkdir($base, 0777); + } elsif (-l _ or ! -d _) { + print "SHOULD BE A DIRECTORY: $base\n"; + unlink($base); + mkdir($base, 0777); + } + } + 1; +} + + diff --git a/bin/udh b/bin/udh new file mode 100755 index 0000000..440e1d9 --- /dev/null +++ b/bin/udh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e + +unset LC_CTYPE +LANG=C +HOST=`hostname -f` + +cd ${HOME}/archvsync +git pull + +cd ${HOME} +~/archvsync/bin/dircombine . . archvsync/ >/dev/null 2>&1 diff --git a/bin/websync b/bin/websync new file mode 100755 index 0000000..f46e3ec --- /dev/null +++ b/bin/websync @@ -0,0 +1,304 @@ +#! /bin/bash +# No, we can not deal with sh alone. + +set -e +set -u +# ERR traps should be inherited from functions too. (And command +# substitutions and subshells and whatnot, but for us the function is +# the important part here) +set -E + +# websync script for Debian +# Based losely on the old websync written by an +# unknown number of different people over the years and ftpsync. +# +# Copyright (C) 2008,2009 Joerg Jaspert +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# In case the admin somehow wants to have this script located someplace else, +# he can set BASEDIR, and we will take that. If it is unset we take ${HOME} +# How the admin sets this isn't our place to deal with. One could use a wrapper +# for that. Or pam_env. Or whatever fits in the local setup. :) +BASEDIR=${BASEDIR:-"${HOME}"} + +# Script version. DO NOT CHANGE, *unless* you change the master copy maintained +# by Joerg Jaspert and the Debian mirroradm group. +# This is used to track which mirror is using which script version. +VERSION="0815" + +# Source our common functions +. "${BASEDIR}/etc/common" + +######################################################################## +######################################################################## +## functions ## +######################################################################## +######################################################################## +# All the stuff we want to do when we exit, no matter where +cleanup() { + trap - ERR TERM HUP INT QUIT EXIT + # all done. Mail the log, exit. + log "Mirrorsync done"; + if [ -n "${MAILTO}" ]; then + # In case rsync had something on stderr + if [ -s "${LOGDIR}/rsync-${NAME}.error" ]; then + mail -e -s "[${PROGRAM}@$(hostname -s)] ($$) rsync ERROR on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} < "${LOGDIR}/rsync-${NAME}.error" + fi + if [ "x${ERRORSONLY}x" = "xfalsex" ]; then + # And the normal log + MAILFILES="${LOG}" + if [ "x${FULLLOGS}x" = "xtruex" ]; then + # Someone wants full logs including rsync + MAILFILES="${MAILFILES} ${LOGDIR}/rsync-${NAME}.log" + fi + cat ${MAILFILES} | mail -e -s "[${PROGRAM}@$(hostname -s)] web sync finished on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} + fi + fi + + savelog "${LOGDIR}/rsync-${NAME}.log" + savelog "${LOGDIR}/rsync-${NAME}.error" + savelog "$LOG" > /dev/null + + rm -f "${LOCK}" +} + + +# Check rsyncs return value +check_rsync() { + + ret=$1 + msg=$2 + + # 24 - vanished source files. Ignored, that should be the target of $UPDATEREQUIRED + # and us re-running. If it's not, uplink is broken anyways. + case "${ret}" in + 0) return 0;; + 24) return 0;; + 23) return 2;; + 30) return 2;; + *) + error "ERROR: ${msg}" + return 1 + ;; + esac +} + +######################################################################## +######################################################################## + +# As what are we called? +NAME="`basename $0`" + +# Now source the config. +. "${BASEDIR}/etc/${NAME}.conf" + +######################################################################## +# Config options go here. Feel free to overwrite them in the config # +# file if you need to. # +# On debian.org machines the defaults should be ok. # +######################################################################## + +######################################################################## +# There should be nothing to edit here, use the config file # +######################################################################## +MIRRORNAME=${MIRRORNAME:-`hostname -f`} +# Where to put logfiles in +LOGDIR=${LOGDIR:-"${BASEDIR}/log"} +# Our own logfile +LOG=${LOG:-"${LOGDIR}/${NAME}.log"} + +# Where should we put all the mirrored files? +TO=${TO:-"/org/www.debian.org/www"} + +# used by log() and error() +PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"} + +# Where to send mails about mirroring to? +if [ "x$(hostname -d)x" != "xdebian.orgx" ]; then + # We are not on a debian.org host + MAILTO=${MAILTO:-"root"} +else + # Yay, on a .debian.org host + MAILTO=${MAILTO:-"mirrorlogs@debian.org"} +fi +# Want errors only or every log? +ERRORSONLY=${ERRORSONLY:-"true"} +# Want full logs, ie. including the rsync one? +FULLLOGS=${FULLLOGS:-"false"} + +# How many logfiles to keep +LOGROTATE=${LOGROTATE:-14} + +# Our lockfile +LOCK=${LOCK:-"${TO}/Website-Update-in-Progress-${MIRRORNAME}"} +# Do we need another rsync run? +UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}" +# Trace file for mirror stats and checks (make sure we get full hostname) +TRACE=${TRACE:-".project/trace/${MIRRORNAME}"} + +# rsync program +RSYNC=${RSYNC:-rsync} +# Rsync filter rules. Used to protect various files we always want to keep, even if we otherwise delete +# excluded files +RSYNC_FILTER=${RSYNC_FILTER:-"--filter=protect_Website-Update-in-Progress-${MIRRORNAME} --filter=protect_${TRACE} --filter=protect_Website-Update-Required-${MIRRORNAME}"} +# Default rsync options for *every* rsync call +RSYNC_OPTIONS=${RSYNC_OPTIONS:-"-prltvHSB8192 --timeout 3600 --stats ${RSYNC_FILTER}"} +RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} +# Which rsync share to use on our upstream mirror? +RSYNC_PATH=${RSYNC_PATH:-"web.debian.org"} + +# our username for the rsync share +RSYNC_USER=${RSYNC_USER:-""} +# the password +RSYNC_PASSWORD=${RSYNC_PASSWORD:-""} + +# a possible proxy +RSYNC_PROXY=${RSYNC_PROXY:-""} + +# General excludes. +EXCLUDE=${EXCLUDE:-"--exclude ${HOSTNAME}"} + +# The temp directory used by rsync --delay-updates is not +# world-readable remotely. Always exclude it to avoid errors. +EXCLUDE="${EXCLUDE} --exclude .~tmp~/" + +# And site specific excludes, by default its the sponsor stuff that should be local to all (except templates) +SITE_FILTER=${SITE_FILTER:-"--include sponsor.deb.* --exclude sponsor_img.* --exclude sponsor.html --exclude sponsor.*.html --filter=protect_sponsor_img.* --filter=protect_sponsor.html --filter=protect_sponsor.*.html"} + +# Hooks +HOOK1=${HOOK1:-""} +HOOK2=${HOOK2:-""} +HOOK3=${HOOK3:-""} +HOOK4=${HOOK4:-""} + +# Are we a hub? +HUB=${HUB:-"false"} + +# Some sane defaults +cd "${BASEDIR}" +umask 022 + +# If we are here for the first time, create the +# destination and the trace directory +mkdir -p "${TO}/.project/trace" + +# Used to make sure we will have the archive fully and completly synced before +# we stop, even if we get multiple pushes while this script is running. +# Otherwise we can end up with a half-synced archive: +# - get a push +# - sync, while locked +# - get another push. Of course no extra sync run then happens, we are locked. +# - done. Archive not correctly synced, we don't have all the changes from the second push. +touch "${UPDATEREQUIRED}" + +# Check to see if another sync is in progress +if ! ( set -o noclobber; echo "$$" > "${LOCK}") 2> /dev/null; then + if ! $(kill -0 $(cat ${LOCK}) 2>/dev/null); then + # Process does either not exist or is not owned by us. + echo "$$" > "${LOCK}" + else + echo "Unable to start rsync, lock file still exists, PID $(cat ${LOCK})" + exit 1 + fi +fi + +trap cleanup EXIT ERR TERM HUP INT QUIT + +# Start log by redirecting everything there. +exec >"$LOG" 2>&1 "${LOGDIR}/rsync-${NAME}.log" 2>"${LOGDIR}/rsync-${NAME}.error" + result=$? + set -e + + log "Back from rsync with returncode ${result}" + + set +e + check_rsync $result "Sync went wrong, got errorcode ${result}. Logfile: ${LOG}" + GO=$? + set -e + + if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then + log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now" + elif [ ${GO} -ne 0 ]; then + exit 3 + fi + + HOOK=( + HOOKNR=2 + HOOKSCR=${HOOK2} + ) + hook $HOOK + +done + +mkdir -p "${TO}/.project/trace" +LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}" +echo "Used websync version: ${VERSION}" >> "${TO}/${TRACE}" +echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}" + +HOOK=( + HOOKNR=3 + HOOKSCR=${HOOK3} +) +hook $HOOK + +if [ x${HUB} = "xtrue" ]; then + log "Trigger slave mirrors" + ${BASEDIR}/bin/runmirrors "websync" + log "Trigger slave done" + + HOOK=( + HOOKNR=4 + HOOKSCR=${HOOK4} + ) + hook $HOOK +fi + +# All done, rest is done by cleanup hook. diff --git a/doc/ftpsync/README b/doc/ftpsync/README new file mode 100644 index 0000000..e85af02 --- /dev/null +++ b/doc/ftpsync/README @@ -0,0 +1,257 @@ +Archvsync +========= + +This is the central repository for the Debian mirror scripts. The scripts +in this repository are written for the purposes of maintaining a Debian +archive mirror (and shortly, a Debian bug mirror), but they should be +easily generalizable. + + +Currently the following scripts are available: + + * ftpsync - Used to sync an archive using rsync + * runmirrors - Used to notify leaf nodes of available updates + * dircombine - Internal script to manage the mirror user's $HOME + on debian.org machines + * typicalsync - Generates a typical Debian mirror + * udh - We are lazy, just a shorthand to avoid typing the + commands, ignore... :) + +Usage +===== +For impatient people, short usage instruction: + + - Create a dedicated user for the whole mirror. + - Create a seperate directory for the mirror, writeable by the new user. + - Place the ftpsync script in the mirror user's $HOME/bin (or just $HOME) + - Place the ftpsync.conf.sample into $HOME/etc as ftpsync.conf and edit + it to suit your system. You should at the very least change the TO= + and RSYNC_HOST lines. + - Create $HOME/log (or wherever you point $LOGDIR to) + - Setup the .ssh/authorized_keys for the mirror user and place the public key of + your upstream mirror into it. Preface it with +no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="~/bin/ftpsync",from="IPADDRESS" + and replace $IPADDRESS with that of your upstream mirror. + - You are finished + +In order to receive different pushes or syncs from different archives, +name the config file ftpsync-$ARCHIVE.conf and call the ftpsync script +with the commandline "sync:archive:$ARCHIVE". Replace $ARCHIVE with a +sensible value. If your upstream mirror pushes you using runmirrors +bundled together with this sync script, you do not need to add the +"sync:archive" parameter to the commandline, the scripts deal with it +automatically. + + + +Debian mirror script minimum requirements +========================================= +As always, you may use whatever scripts you want for your Debian mirror, +but we *STRONGLY* recommend you to not invent your own. However, if you +want to be listed as a mirror it *MUST* support the following minimal +functionality: + + - Must perform a 2-stage sync + The archive mirroring must be done in 2 stages. The first rsync run + must ignore the index files. The correct exclude options for the + first rsync run are: + --exclude Packages* --exclude Sources* --exclude Release* --exclude ls-lR* + The first stage must not delete any files. + + The second stage should then transfer the above excluded files and + delete files that no longer belong on the mirror. + + Rationale: If archive mirroring is done in a single stage, there will be + periods of time during which the index files will reference files not + yet mirrored. + + - Must not ignore pushes whil(e|st) running. + If a push is received during a run of the mirror sync, it MUST NOT + be ignored. The whole synchronization process must be rerun. + + Rationale: Most implementations of Debian mirror scripts will leave the + mirror in an inconsistent state in the event of a second push being + received while the first sync is still running. It is likely that in + the near future, the frequency of pushes will increase. + + - Should understand multi-stage pushes. + The script should parse the arguments it gets via ssh, and if they + contain a hint to only sync stage1 or stage2, then ONLY those steps + SHOULD be performed. + + Rationale: This enables us to coordinate the timing of the first + and second stage pushes and minimize the time during which the + archive is desynchronized. This is especially important for mirrors + that are involved in a round robin or GeoDNS setup. + + The minimum arguments the script has to understand are: + sync:stage1 Only sync stage1 + sync:stage2 Only sync stage2 + sync:all Do everything. Default if none of stage1/2 are + present. + There are more possible arguments, for a complete list see the + ftpsync script in our git repository. + + + +ftpsync +======= + +This script is based on the old anonftpsync script. It has been rewritten +to add flexibilty and fix a number of outstanding issues. + +Some of the advantages of the new version are: + - Nearly every aspect is configurable + - Correct support for multiple pushes + - Support for multi-stage archive synchronisations + - Support for hook scripts at various points + - Support for multiple archives, even if they are pushed using one ssh key + - Support for multi-hop, multi-stage archive synchronisations + + Correct support for multiple pushes + ----------------------------------- + When the script receives a second push while it is running and syncing + the archive it won't ignore it. Instead it will rerun the + synchronisation step to ensure the archive is correctly synchronised. + + Scripts that fail to do that risk ending up with an inconsistent archive. + + + Can do multi-stage archive synchronisations + ------------------------------------------- + The script can be told to only perform the first or second stage of the + archive synchronisation. + + This enables us to send all the binary packages and sources to a + number of mirrors, and then tell all of them to sync the + Packages/Release files at once. This will keep the timeframe in which + the mirrors are out of sync very small and will greatly help things like + DNS RR entries or even the planned GeoDNS setup. + + + Multi-hop, multi-stage archive synchronisations + ----------------------------------------------- + The script can be told to perform a multi-hop multi-stage archive + synchronisation. + + This is basically the same as the multi-stage synchronisation + explained above, but enables the downstream mirror to push his own + staged/multi-hop downstreams before returning. This has the same + advantage than the multi-stage synchronisation but allows us to do + this over multiple level of mirrors. (Imagine one push going from + Europe to Australia, where then locally 3 others get updated before + stage2 is sent out. Instead of 4times transferring data from Europe to + Australia, just to have them all updated near instantly). + + + Can run hook scripts + -------------------- + ftpsync currently allows 5 hook scripts to run at various points of the + mirror sync run. + + Hook1: After lock is acquired, before first rsync + Hook2: After first rsync, if successful + Hook3: After second rsync, if successful + Hook4: Right before leaf mirror triggering + Hook5: After leaf mirror trigger (only if we have slave mirrors; HUB=true) + + Note that Hook3 and Hook4 are likely to be called directly after each other. + The difference is that Hook3 is called *every* time the second rsync + succeeds even if the mirroring needs to re-run due to a second push. + Hook4 is only executed if mirroring is completed. + + + Support for multiple archives, even if they are pushed using one ssh key + ------------------------------------------------------------------------ + If you get multiple archives from your upstream mirror (say Debian, + Debian-Backports and Volatile), previously you had to use 3 different ssh + keys to be able to automagically synchronize them. This script can do it + all with just one key, if your upstream mirror tells you which archive. + See "Commandline/SSH options" below for further details. + + +For details of all available options, please see the extensive documentation +in the sample configuration file. + + +Commandline/SSH options +======================= +Script options may be set either on the local command line, or passed by +specifying an ssh "command". Local commandline options always have +precedence over the SSH_ORIGINAL_COMMAND ones. + +Currently this script understands the options listed below. To make them +take effect they MUST be prepended by "sync:". + +Option Behaviour +stage1 Only do stage1 sync +stage2 Only do stage2 sync +all Do a complete sync (default) +mhop Do a multi-hop sync +archive:foo Sync archive foo (if the file $HOME/etc/ftpsync-foo.conf + exists and is configured) +callback Call back when done (needs proper ssh setup for this to + work). It will always use the "command" callback:$HOSTNAME + where $HOSTNAME is the one defined in config and + will happen before slave mirrors are triggered. + +So, to get the script to sync all of the archive behind bpo and call back when +it is complete, use an upstream trigger of +ssh $USER@$HOST sync:all sync:archive:bpo sync:callback + + +Mirror trace files +================== +Every mirror needs to have a 'trace' file under project/trace. +The file format is as follows: + + The filename has to be the full hostname (eg. hostname -f), or in the + case of a mirror participating in RR DNS (where users will never use + the hostname) the name of the DNS RR entry, eg. security.debian.org + for the security rotation) + + The content has (no leading spaces): + Sat Nov 8 13:20:22 UTC 2008 + Used ftpsync version: 42 + Running on host: steffani.debian.org + + First line: Output of date -u + Second line: Freeform text containing the program name and version + Third line: Text "Running on host: " followed by hostname -f + + The third line MUST NOT be the DNS RR name, even if the mirror is part + of it. It MUST BE the hosts own name. This is in contrast to the filename, + which SHOULD be the DNS RR name. + + +runmirrors +========== +This script is used to tell leaf mirrors that it is time to synchronize +their copy of the archive. This is done by parsing a mirror list and +using ssh to "push" the leaf nodes. You can read much more about the +principle behind the push at [1], essentially it tells the receiving +end to run a pre-defined script. As the whole setup is extremely limited +and the ssh key is not usable for anything else than the pre-defined +script this is the most secure method for such an action. + +This script supports two types of pushes: The normal single stage push, +as well as the newer multi-stage push. + +The normal push, as described above, will simply push the leaf node and +then go on with the other nodes. + +The multi-staged push first pushes a mirror and tells it to only do a +stage1 sync run. Then it waits for the mirror (and all others being pushed +in the same run) to finish that run, before it tells all of the staged +mirrors to do the stage2 sync. + +This way you can do a nearly-simultaneous update of multiple hosts. +This is useful in situations where periods of desynchronization should +be kept as small as possible. Examples of scenarios where this might be +useful include multiple hosts in a DNS Round Robin entry. + +For details on the mirror list please see the documented +runmirrors.mirror.sample file. + + +[1] http://blog.ganneff.de/blog/2007/12/29/ssh-triggers.html diff --git a/doc/ftpsync/ftpsync.conf.sample b/doc/ftpsync/ftpsync.conf.sample new file mode 100644 index 0000000..df490f1 --- /dev/null +++ b/doc/ftpsync/ftpsync.conf.sample @@ -0,0 +1,148 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the ftpsync mirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=`hostname -f` + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else +## that might have happened to be in there WILL BE GONE after the mirror sync! +#TO="/org/ftp.debian.org/ftp/" + +## The upstream name of the rsync share. +#RSYNC_PATH="ftp" + +## The host we mirror from +#RSYNC_HOST=some.mirror.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the ftpsync script depending on the way it +## is called. See README for a description of the multi-archive capability +## and better always include ${NAME} in this path. +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude statements here. +## See rsync(1) for the exact syntax, these are passed to rsync as written here. +## DO NOT TRY TO EXCLUDE ARCHITECTURES OR SUITES WITH THIS, IT WILL NOT WORK! +#EXCLUDE="" + +## If you do want to exclude an architecture, this is for you. +## Use as space seperated list. +## Possible values are: +## alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, kfreebsd-amd64, +## kfreebsd-i386, m68k, mipsel, mips, powerpc, s390, sh, sparc and source +## eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" +## An unset value will mirror all architectures (default!) +#ARCH_EXCLUDE="" + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors.mirrors +## and runmirrors.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, this +## defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Archive-Update-in-Progress-${MIRRORNAME}" + +# Timeout for the lockfile, in case we have bash older than v4 (and no /proc) +# LOCKTIMEOUT=${LOCKTIMEOUT:-3600} + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last +## had a successful mirror sync. Make sure that it always ends up in +## project/trace and always shows the full hostname. +## This is *relative* to ${TO} +#TRACE="project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +#RSYNC=rsync + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## limit I/O bandwidth. Value is KBytes per second, unset or 0 means unlimited +#RSYNC_BW="" + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Options the first pass gets. We do not want the Packages/Source indices +## here, and we also do not want to delete any files yet. +#RSYNC_OPTIONS1="--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*" + +## Options the second pass gets. Now we want the Packages/Source indices too +## and we also want to delete files. We also want to delete files that are +## excluded. +#RSYNC_OPTIONS2="--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded" + +## You may establish the connection via a web proxy by setting the environment +## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note +## that your web proxy's configuration must support proxy connections to port 873. +# RSYNC_PROXY= + + +## The following three options are used in case we want to "callback" the host +## we got pushed from. +#CALLBACKUSER="archvsync" +#CALLBACKHOST="none" +#CALLBACKKEY="none" + + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each other. +## Difference is: Hook3 is called *every* time the second rsync was successful, +## but even if the mirroring needs to re-run thanks to a second push. +## Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/doc/ftpsync/logrotate.conf b/doc/ftpsync/logrotate.conf new file mode 100644 index 0000000..44f00d4 --- /dev/null +++ b/doc/ftpsync/logrotate.conf @@ -0,0 +1,11 @@ +### -*-conf-*- +### +### Log rotation configuration for mirrors. + +compress +rotate 31 +minsize 4194304 +daily + +/mnt/ftp/mirror-admin/log/*.log { +} diff --git a/doc/ftpsync/pushpdo.conf.sample b/doc/ftpsync/pushpdo.conf.sample new file mode 100644 index 0000000..e9be9e3 --- /dev/null +++ b/doc/ftpsync/pushpdo.conf.sample @@ -0,0 +1,40 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the runmirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Which ssh key to use? +#KEYFILE=.ssh/pushmirror + +## The directory for our logfiles +#LOGDIR="${BASEDIR}/log" + +## Our own logfile +#LOG="${LOGDIR}/${NAME}.log" + +## Our lockfile directory +#LOCKDIR="${BASEDIR}/locks" + +## We do create a logfile for every run. To save space we rotate it, this +## defines how many we keep +#LOGROTATE=14 + +## Our mirrorfile +#MIRRORS="${BASEDIR}/etc/${NAME}.mirror" + +## extra ssh options we might want. *hostwide* +#SSH_OPTS="" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## How long to wait for mirrors to do stage1 if we have multi-stage syncing +#PUSHDELAY=240 + +## which path to push +#MIRRORPATH="/org/packages.debian.org/mirror/" diff --git a/doc/ftpsync/pushpdo.mirror.sample b/doc/ftpsync/pushpdo.mirror.sample new file mode 100644 index 0000000..711d496 --- /dev/null +++ b/doc/ftpsync/pushpdo.mirror.sample @@ -0,0 +1,21 @@ +# Definition of mirror hosts we push. +# One mirror per line, with the following fields defined. +# +# ShortName HostName User SSHProtocol SSHKeyFile +# +# ShortName will be used as a shorthand in logfile outputs and for the logfile +# where every ssh output gets redirected to. +# +# If no SSHKeyFile is given, the default from the config file applies. +# If SSHProtocol is empty, it will default to 2, but if you want to +# define a keyfile you HAVE TO set protocol too! +# +# Examples: +# +# piatti piatti.debian.org archvsync +# One special value is allowed: DELAY +# This word has to be on a line itself, followed by a space and a number. +# nothing else, not even whitespace. It will trigger a pause of $number +# seconds between the two mirrors. If no number is given it defaults to +# 60 seconds. +piatti piatti.debian.org archvsync diff --git a/doc/ftpsync/runmirrors.conf.sample b/doc/ftpsync/runmirrors.conf.sample new file mode 100644 index 0000000..9a77c9e --- /dev/null +++ b/doc/ftpsync/runmirrors.conf.sample @@ -0,0 +1,53 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the runmirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Which ssh key to use? +#KEYFILE=.ssh/pushmirror + +## The directory for our logfiles +#LOGDIR="${BASEDIR}/log" + +## Our own logfile +#LOG="${LOGDIR}/${NAME}.log" + +## Our lockfile directory +#LOCKDIR="${BASEDIR}/locks" + +## We do create a logfile for every run. To save space we rotate it, this +## defines how many we keep +#LOGROTATE=14 + +## Our mirrorfile +#MIRRORS="${BASEDIR}/etc/${NAME}.mirror" + +## extra ssh options we might want. *hostwide* +## By default, ignore ssh key change of leafs +#SSH_OPTS="-o StrictHostKeyChecking=no" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## Whats our archive name? We will also tell our leafs about it +## This is usually empty, but if we are called as "runmirrors bpo" +## it will default to bpo. This way one runmirrors script can serve +## multiple archives, similar to what ftpsync does. +#PUSHARCHIVE="${CONF}" + +## How long to wait for mirrors to do stage1 if we have multi-stage syncing +#PUSHDELAY=600 + +## Hook scripts can be run at various places. +## Leave them blank/commented out if you don't want any +## Hook1: After reading config, before doing the first real action +## Hook2: Between two hosts to push +## Hook3: When everything is done +#HOOK1="" +#HOOK2="" +#HOOK3="" diff --git a/doc/ftpsync/runmirrors.mirror.sample b/doc/ftpsync/runmirrors.mirror.sample new file mode 100644 index 0000000..744ad98 --- /dev/null +++ b/doc/ftpsync/runmirrors.mirror.sample @@ -0,0 +1,72 @@ +# Definition of mirror hosts we push. +# One mirror per line, with the following fields defined. +# +# Type ShortName HostName User SSHProtocol SSHKeyFile +# +# ALTERNATIVELY the line may look like +# +# Type ShortName HostName User -$SOMESSHOPTION +# +# The fields Type, ShortName, HostName and User are *mandantory*. +# +# Type is either all, staged or mhop, meaning: +# all - do a "normal" push. Trigger them, go on. +# staged - do a two-stage push, waiting for them after stage 2(and all others that +# are staged) before doing stage2 +# mhop - send a multi-hop staged push. This will tell the mirror to initiate +# a mhop/stage1 push to its staged/mhop mirrors and then exit. +# When all mhop got back we then send the stage2 through to them. +# +# ShortName will be used as a shorthand in logfile outputs and for the logfile +# where every ssh output gets redirected to. +# +# If no SSHKeyFile is given, the default from the config file applies. +# If SSHProtocol is empty, it will default to 2, but if you want to +# define a keyfile you HAVE TO set protocol too! +# +# With the ALTERNATIVE syntax you are able to use any special ssh option +# you want just for one special mirror. The option after the username +# then MUST start with a -, in which case the whole rest of the line is taken +# as a set of options to give to ssh, it is passed through without doing +# anything with it. +# +# There is one caveat here: Should you want to use the -i option to give +# another ssh key to use, keep in mind that the master keyfile will +# always be presented too! That is, ssh will show both keys to the other +# side and the first one presented wins. Which might not be the key you +# want. There is currently no way around this, as that would mean +# dropping backward compatibility. +# +# Backwards compatibility: +# An older runmirrors script will NOT run with a newer runmirrors.mirror file, but +# a new runmirrors can run with an old runmirrors.mirror file. This should make updates +# painless. +# +# Examples: +# all eu.puccini puccini.debian.org archvsync 2 +# +# -> will push puccini.debian.org, user archvsync, using ssh protocol 2 +# and the globally configured ssh key. +# +# all eu.puccini puccini.debian.org archvsync -p 2222 +# +# -> will do the same as above, but use port 2222 to connect to. +# +# staged eu.puccini puccini.debian.org archvsync +# staged eu.powell powell.debian.org archvsync +# +# -> will push both puccini and powell in stage1, waiting for both to +# finish stage1 before stage2 gets pushed. The username will be archvsync. +# +# staged eu.puccini puccini.debian.org archvsync +# mhop eu.powell powell.debian.org archvsync +# +# -> will do the same as above, but powell gets told about mhop and can then +# push its own staged/mhop mirrors before returning. When both returned +# then stage2 is sent to both. +# +# One special value is allowed: DELAY +# This word has to be on a line itself, followed by a space and a number. +# nothing else, not even whitespace. It will trigger a pause of $number +# seconds between the two mirrors. If no number is given it defaults to +# 600 seconds. diff --git a/doc/ftpsync/websync.conf.sample b/doc/ftpsync/websync.conf.sample new file mode 100644 index 0000000..7a39313 --- /dev/null +++ b/doc/ftpsync/websync.conf.sample @@ -0,0 +1,121 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the ftpsync mirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=${MIRRORNAME:-`hostname -f`} + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else +## that might have happened to be in there WILL BE GONE after the mirror sync! +#TO=${TO:-"/org/www.debian.org/www"} + +## The upstream name of the rsync share. +#RSYNC_PATH="web.debian.org" + +## The host we mirror from +#RSYNC_HOST=www-master.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the websync script +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude statements here. +## See rsync(1) for the exact syntax, these are passed to rsync as written here. +## Please do not use this except for rare cases and after you talked to us. +## For the sponsor logos see SITE_FILTER +#EXCLUDE=${EXCLUDE:-"--exclude ${HOSTNAME}"} + +## And site specific excludes, by default its the sponsor stuff that should be local to all +#SITE_FILTER=${SITE_FILTER:-"--include sponsor.deb.* --exclude sponsor_img.* --exclude sponsor.html --exclude sponsor.*.html --filter=protect_sponsor_img.* --filter=protect_sponsor.html --filter=protect_sponsor.*.html"} + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors-websync.mirrors +## and runmirrors-websync.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, this +## defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Website-Update-in-Progress-${MIRRORNAME}" + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last +## had a successful mirror sync. Make sure that it always ends up in +## .project/trace and always shows the full hostname. +## This is *relative* to ${TO} +#TRACE=".project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +#RSYNC=rsync + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Default rsync options +#RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} + +## You may establish the connection via a web proxy by setting the environment +## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note +## that your web proxy's configuration must support proxy connections to port 873. +# RSYNC_PROXY= + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each other. +## Difference is: Hook3 is called *every* time the second rsync was successful, +## but even if the mirroring needs to re-run thanks to a second push. +## Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/etc/common b/etc/common new file mode 100644 index 0000000..55beb9b --- /dev/null +++ b/etc/common @@ -0,0 +1,230 @@ +# -*- mode:sh -*- +# Little common functions + +# push a mirror attached to us. +# Arguments (using an array named SIGNAL_OPTS): +# +# $MIRROR - Name for the mirror, also basename for the logfile +# $HOSTNAME - Hostname to push to +# $USERNAME - Username there +# $SSHPROTO - Protocol version, either 1 or 2. +# $SSHKEY - the ssh private key file to use for this push +# $SSHOPTS - any other option ssh accepts, passed blindly, be careful +# $PUSHLOCKOWN - own lockfile name to touch after stage1 in pushtype=staged +# $PUSHTYPE - what kind of push should be done? +# all - normal, just push once with ssh backgrounded and finish +# staged - staged. first push stage1, then wait for $PUSHLOCKs to appear, +# then push stage2 +# $PUSHARCHIVE - what archive to sync? (Multiple mirrors behind one ssh key!) +# $PUSHCB - do we want a callback? +# $PUSHKIND - whats going on? are we doing mhop push or already stage2? +# $FROMFTPSYNC - set to true if we run from within ftpsync. +# +# This function assumes that the variable LOG is set to a directory where +# logfiles can be written to. +# Additionally $PUSHLOCKS has to be defined as a set of space delimited strings +# (list of "lock"files) to wait for if you want pushtype=staged +# +# Pushes might be done in background (for type all). +signal () { + ARGS="SIGNAL_OPTS[*]" + local ${!ARGS} + + MIRROR=${MIRROR:-""} + HOSTNAME=${HOSTNAME:-""} + USERNAME=${USERNAME:-""} + SSHPROTO=${SSHPROTO:-""} + SSHKEY=${SSHKEY:-""} + SSHOPTS=${SSHOPTS:-""} + PUSHLOCKOWN=${PUSHLOCKOWN:-""} + PUSHTYPE=${PUSHTYPE:-"all"} + PUSHARCHIVE=${PUSHARCHIVE:-""} + PUSHCB=${PUSHCB:-""} + PUSHKIND=${PUSHKIND:-"all"} + FROMFTPSYNC=${FROMFTPSYNC:-"false"} + + # And now get # back to space... + SSHOPTS=${SSHOPTS/\#/ } + + # Defaults we always want, no matter what + SSH_OPTIONS="-o user=${USERNAME} -o BatchMode=yes -o ServerAliveInterval=45 -o ConnectTimeout=45 -o PasswordAuthentication=no" + + # If there are userdefined ssh options, add them. + if [ -n "${SSH_OPTS}" ]; then + SSH_OPTIONS="${SSH_OPTIONS} ${SSH_OPTS}" + fi + + # Does this machine need a special key? + if [ -n "${SSHKEY}" ]; then + SSH_OPTIONS="${SSH_OPTIONS} -i ${SSHKEY}" + fi + + # Does this machine have an extra own set of ssh options? + if [ -n "${SSHOPTS}" ]; then + SSH_OPTIONS="${SSH_OPTIONS} ${SSHOPTS}" + fi + + # Set the protocol version + if [ ${SSHPROTO} -ne 1 ] && [ ${SSHPROTO} -ne 2 ] && [ ${SSHPROTO} -ne 99 ]; then + # Idiots, we only want 1 or 2. Cant decide? Lets force 2. + SSHPROTO=2 + fi + if [ -n "${SSHPROTO}" ] && [ ${SSHPROTO} -ne 99 ]; then + SSH_OPTIONS="${SSH_OPTIONS} -${SSHPROTO}" + fi + + date -u >> "${LOGDIR}/${MIRROR}.log" + + PUSHARGS="" + # PUSHARCHIVE empty or not, we always add the sync:archive: command to transfer. + # Otherwise, if nothing else is added, ssh -f would not work ("no command to execute") + # But ftpsync does treat "sync:archive:" as the main archive, so this works nicely. + PUSHARGS="${PUSHARGS} sync:archive:${PUSHARCHIVE}" + + # We have a callback wish, tell downstreams + if [ -n "${PUSHCB}" ]; then + PUSHARGS="${PUSHARGS} sync:callback" + fi + # If we are running an mhop push AND our downstream is one to receive it, tell it. + if [ "xmhopx" = "x${PUSHKIND}x" ] && [ "xmhopx" = "x${PUSHTYPE}x" ]; then + PUSHARGS="${PUSHARGS} sync:mhop" + fi + + if [ "xallx" = "x${PUSHTYPE}x" ]; then + # Default normal "fire and forget" push. We background that, we do not care about the mirrors doings + echo "Sending normal push" >> "${LOGDIR}/${MIRROR}.log" + PUSHARGS1="sync:all" + ssh -f $SSH_OPTIONS "${HOSTNAME}" "${PUSHARGS} ${PUSHARGS1}" >>"${LOGDIR}/${MIRROR}.log" + elif [ "xstagedx" = "x${PUSHTYPE}x" ] || [ "xmhopx" = "x${PUSHTYPE}x" ]; then + # Want a staged push. Fine, lets do that. Not backgrounded. We care about the mirrors doings. + echo "Sending staged push" >> "${LOGDIR}/${MIRROR}.log" + + # Only send stage1 if we havent already send it. When called with stage2, we already did. + if [ "xstage2x" != "x${PUSHKIND}x" ]; then + # Step1: Do a push to only sync stage1, do not background + PUSHARGS1="sync:stage1" + ssh $SSH_OPTIONS "${HOSTNAME}" "${PUSHARGS} ${PUSHARGS1}" >>"${LOGDIR}/${MIRROR}.log" 2>&1 + touch "${PUSHLOCKOWN}" + + # Step2: Wait for all the other "lock"files to appear. + tries=0 + # We do not wait forever + while [ ${tries} -lt ${PUSHDELAY} ]; do + total=0 + found=0 + for file in ${PUSHLOCKS}; do + total=$((total + 1)) + if [ -f ${file} ]; then + found=$((found + 1)) + fi + done + if [ ${total} -eq ${found} ] || [ -f "${LOCKDIR}/all_stage1" ]; then + touch "${LOCKDIR}/all_stage1" + break + fi + tries=$((tries + 5)) + sleep 5 + done + # In case we did not have all PUSHLOCKS and still continued, note it + # This is a little racy, especially if the other parts decide to do this + # at the same time, but it wont hurt more than a mail too much, so I don't care much + if [ ${tries} -ge ${PUSHDELAY} ]; then + echo "Failed to wait for all other mirrors. Failed ones are:" >> "${LOGDIR}/${MIRROR}.log" + for file in ${PUSHLOCKS}; do + if [ ! -f ${file} ]; then + echo "${file}" >> "${LOGDIR}/${MIRROR}.log" + error "Missing Pushlockfile ${file} after waiting ${tries} second, continuing" + fi + done + fi + rm -f "${PUSHLOCKOWN}" + fi + + # Step3: It either timed out or we have all the "lock"files, do the rest + # If we are doing mhop AND are called from ftpsync - we now exit. + # That way we notify our uplink that we and all our clients are done with their + # stage1. It can then finish its own, and if all our upstreams downlinks are done, + # it will send us stage2. + # If we are not doing mhop or are not called from ftpsync, we start stage2 + if [ "xtruex" = "x${FROMFTPSYNC}x" ] && [ "xmhopx" = "x${PUSHKIND}x" ]; then + return + else + PUSHARGS2="sync:stage2" + echo "Now doing the second stage push" >> "${LOGDIR}/${MIRROR}.log" + ssh $SSH_OPTIONS "${HOSTNAME}" "${PUSHARGS} ${PUSHARGS2}" >>"${LOGDIR}/${MIRROR}.log" 2>&1 + fi + else + # Can't decide? Then you get nothing. + return + fi +} + +# callback, used by ftpsync +callback () { + # Defaults we always want, no matter what + SSH_OPTIONS="-o BatchMode=yes -o ServerAliveInterval=45 -o ConnectTimeout=45 -o PasswordAuthentication=no" + ssh $SSH_OPTIONS -i "$3" -o"user $1" "$2" callback:${HOSTNAME} +} + +# log something (basically echo it together with a timestamp) +# +# Set $PROGRAM to a string to have it added to the output. +log () { + if [ -z "${PROGRAM}" ]; then + echo "$(date +"%b %d %H:%M:%S") $(hostname -s) [$$] $@" + else + echo "$(date +"%b %d %H:%M:%S") $(hostname -s) ${PROGRAM}[$$]: $@" + fi +} + +# log the message using log() but then also send a mail +# to the address configured in MAILTO (if non-empty) +error () { + log "$@" + if [ -z "${MAILTO}" ]; then + echo "$@" | mail -e -s "[$PROGRAM@$(hostname -s)] ERROR [$$]" ${MAILTO} + fi +} + +# run a hook +# needs array variable HOOK setup with HOOKNR being a number an HOOKSCR +# the script to run. +hook () { + ARGS='HOOK[@]' + local "${!ARGS}" + if [ -n "${HOOKSCR}" ]; then + log "Running hook $HOOKNR: ${HOOKSCR}" + set +e + "${HOOKSCR}" + result=$? + set -e + log "Back from hook $HOOKNR, got returncode ${result}" + return $result + else + return 0 + fi +} + +# Return the list of 2-stage mirrors. +get2stage() { + egrep '^(staged|mhop)' "${MIRRORS}" | { + while read MTYPE MLNAME MHOSTNAME MUSER MPROTO MKEYFILE; do + PUSHLOCKS="${LOCKDIR}/${MLNAME}.stage1 ${PUSHLOCKS}" + done + echo "$PUSHLOCKS" + } +} + +# Rotate logfiles +savelog() { + torotate="$1" + count=${2:-${LOGROTATE}} + while [ ${count} -gt 0 ]; do + prev=$(( count - 1 )) + if [ -e "${torotate}.${prev}" ]; then + mv "${torotate}.${prev}" "${torotate}.${count}" + fi + count=$prev + done + mv "${torotate}" "${torotate}.0" +} diff --git a/etc/ftpsync-debian-security.conf b/etc/ftpsync-debian-security.conf new file mode 100644 index 0000000..df490f1 --- /dev/null +++ b/etc/ftpsync-debian-security.conf @@ -0,0 +1,148 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the ftpsync mirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=`hostname -f` + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else +## that might have happened to be in there WILL BE GONE after the mirror sync! +#TO="/org/ftp.debian.org/ftp/" + +## The upstream name of the rsync share. +#RSYNC_PATH="ftp" + +## The host we mirror from +#RSYNC_HOST=some.mirror.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the ftpsync script depending on the way it +## is called. See README for a description of the multi-archive capability +## and better always include ${NAME} in this path. +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude statements here. +## See rsync(1) for the exact syntax, these are passed to rsync as written here. +## DO NOT TRY TO EXCLUDE ARCHITECTURES OR SUITES WITH THIS, IT WILL NOT WORK! +#EXCLUDE="" + +## If you do want to exclude an architecture, this is for you. +## Use as space seperated list. +## Possible values are: +## alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, kfreebsd-amd64, +## kfreebsd-i386, m68k, mipsel, mips, powerpc, s390, sh, sparc and source +## eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" +## An unset value will mirror all architectures (default!) +#ARCH_EXCLUDE="" + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors.mirrors +## and runmirrors.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, this +## defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Archive-Update-in-Progress-${MIRRORNAME}" + +# Timeout for the lockfile, in case we have bash older than v4 (and no /proc) +# LOCKTIMEOUT=${LOCKTIMEOUT:-3600} + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last +## had a successful mirror sync. Make sure that it always ends up in +## project/trace and always shows the full hostname. +## This is *relative* to ${TO} +#TRACE="project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +#RSYNC=rsync + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## limit I/O bandwidth. Value is KBytes per second, unset or 0 means unlimited +#RSYNC_BW="" + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Options the first pass gets. We do not want the Packages/Source indices +## here, and we also do not want to delete any files yet. +#RSYNC_OPTIONS1="--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*" + +## Options the second pass gets. Now we want the Packages/Source indices too +## and we also want to delete files. We also want to delete files that are +## excluded. +#RSYNC_OPTIONS2="--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded" + +## You may establish the connection via a web proxy by setting the environment +## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note +## that your web proxy's configuration must support proxy connections to port 873. +# RSYNC_PROXY= + + +## The following three options are used in case we want to "callback" the host +## we got pushed from. +#CALLBACKUSER="archvsync" +#CALLBACKHOST="none" +#CALLBACKKEY="none" + + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each other. +## Difference is: Hook3 is called *every* time the second rsync was successful, +## but even if the mirroring needs to re-run thanks to a second push. +## Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/etc/ftpsync-debian.conf b/etc/ftpsync-debian.conf new file mode 100644 index 0000000..391e6b7 --- /dev/null +++ b/etc/ftpsync-debian.conf @@ -0,0 +1,184 @@ +### -*-sh-*- +### +### ftpsync configuration for the Debian main archive. + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=`hostname -f` + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else that +## might have happened to be in there WILL BE GONE after the mirror sync! +TO="/mnt/ftp/pub/mirrors/debian/" + +## The upstream name of the rsync share. +RSYNC_PATH="debian" + +## The host we mirror from +RSYNC_HOST=ftp.uk.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the ftpsync script depending on the way it +## is called. See README for a description of the multi-archive capability +## and better always include ${NAME} in this path. +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude +## statements here. See rsync(1) for the exact syntax, these are passed to +## rsync as written here. DO NOT TRY TO EXCLUDE ARCHITECTURES OR SUITES WITH +## THIS, IT WILL NOT WORK! +#EXCLUDE="" + +## If you do want to exclude an architecture, this is for you. +## Use as space seperated list. +## Possible values are: +## alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, kfreebsd-amd64, +## kfreebsd-i386, m68k, mipsel, mips, powerpc, s390, sh, sparc and source +## eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" +## An unset value will mirror all architectures (default!) +#ARCH_EXCLUDE="" + +## Actually, no: we want to /include/ only some architectures. So we need to +## find out which ones are available so that we can exclude the ones we don't +## want. Crazy, no? +ARCH_EXCLUDE=$( + + ## Check the available distributions for architectures. + for dist in oldstable stable testing unstable experimental; do + rsync --list-only $RSYNC_HOST::$RSYNC_PATH/dists/$dist/main/ + done | { + + ## Gather up excluded architectures as we go. + excludes="" + + while read mode size date time name; do + + ## Check directories of binary packages. If it's an architecture we + ## don't want to reject, then continue on. + case "$name" in + (binary-all | binary-i386 | binary-amd64) continue ;; + (binary-*) ;; + (*) continue ;; + esac + + ## Pick out the architecture name. Check whether we've seen it before. + arch=${name#binary-} + case " $excludes " in + (*" $arch "*) + ;; + (*) + excludes="${excludes+$excludes }$arch" + ;; + esac + done + + ## Done. Print out the finished list. + echo $excludes + } +) + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors.mirrors +## and runmirrors.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, +## this defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Archive-Update-in-Progress-${MIRRORNAME}" + +## Timeout for the lockfile, in case we have bash older than v4 (and no +## /proc) +# LOCKTIMEOUT=${LOCKTIMEOUT:-3600} + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last had a +## successful mirror sync. Make sure that it always ends up in project/trace +## and always shows the full hostname. This is *relative* to ${TO} +#TRACE="project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +RSYNC="timeout 6h rsync" + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## limit I/O bandwidth. Value is KBytes per second, unset or 0 means +## unlimited +#RSYNC_BW="" + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Options the first pass gets. We do not want the Packages/Source indices +## here, and we also do not want to delete any files yet. +#RSYNC_OPTIONS1="--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*" + +## Options the second pass gets. Now we want the Packages/Source indices too +## and we also want to delete files. We also want to delete files that are +## excluded. +#RSYNC_OPTIONS2="--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded" + +## You may establish the connection via a web proxy by setting the +## environment variable RSYNC_PROXY to a hostname:port pair pointing to your +## web proxy. Note that your web proxy's configuration must support proxy +## connections to port 873. +# RSYNC_PROXY= + +## The following three options are used in case we want to "callback" the +## host we got pushed from. +#CALLBACKUSER="archvsync" +#CALLBACKHOST="none" +#CALLBACKKEY="none" + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each +## other. Difference is: Hook3 is called *every* time the second rsync was +## successful, but even if the mirroring needs to re-run thanks to a second +## push. Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/etc/mirror.packages b/etc/mirror.packages new file mode 100644 index 0000000..a790722 --- /dev/null +++ b/etc/mirror.packages @@ -0,0 +1,27 @@ +### -*-conf-*- +### +### General mirror configuration. + +###-------------------------------------------------------------------------- +package=defaults + + ## Basic login stuff. + remote_user=ftp + remote_password=mirror@distorted.org.uk + passive_ftp=true + timeout=120 + + ## What to fetch. + recursive=true + exclude_patt+|\~$ + + ## Sensible mode settings. + mode_copy=false + file_mode=0664 + dir_mode=0775 + + ## Tidy up files which disappear upstream. + do_deletes=true + max_delete_files=80% + +###----- That's all, folks -------------------------------------------------- diff --git a/lib/functions.sh b/lib/functions.sh new file mode 100644 index 0000000..7c5fbe9 --- /dev/null +++ b/lib/functions.sh @@ -0,0 +1,27 @@ +### -*-sh-*- +### +### Common functions for mirror scripts. + +rsync_timeout=60 + +standard_rsync () { + for i in 1 2 3 4 5; do + set +e + rsync --recursive \ + --verbose \ + --timeout=$rsync_timeout \ + --times --omit-dir-times \ + --links --safe-links \ + --delete-during --delete-excluded \ + --chmod=ug=rw,o=r,Dg+s,Da+x --perms \ + "$@" + rc=$? + set -e + case $rc in + 30) ;; + *) return $rc ;; + esac + echo >&2 "rsync connection timed out: retrying" + done + return 30 +} -- 2.11.0