From: Mark Wooding Date: Mon, 10 Mar 2014 10:48:24 +0000 (+0000) Subject: Merge branch 'ftpsync' X-Git-Url: https://git.distorted.org.uk/~mdw/mirror-admin/commitdiff_plain/a61843da3f2a729a4e1b7dee4c947b2bf1cf3802?hp=2e267ae8659832e97f23b78eeab6eaf793f60e19 Merge branch 'ftpsync' * ftpsync: New ftpsync upstream. Conflicts: bin/ftpsync: `-excludes' -> `--excludes' now upstream --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69e47ea --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +log +var +etc/krb4.keytab diff --git a/.userv/rc b/.userv/rc new file mode 100644 index 0000000..b86f0c0 --- /dev/null +++ b/.userv/rc @@ -0,0 +1,8 @@ +### -*-conf-*- + +if ( glob service run + & glob calling-group ftpadmin + ) + no-suppress-args + execute bin/run-mirrors +fi diff --git a/bin/debian-excludes b/bin/debian-excludes new file mode 100755 index 0000000..11701f2 --- /dev/null +++ b/bin/debian-excludes @@ -0,0 +1,53 @@ +#! /bin/sh + +set -e +case $# in + 0 | 1 | 2 | 3) + echo >&2 "Usage: $0 HOST PATH ARCH:ARCH:... DIST..." + exit 1 + ;; + *) + RSYNC_HOST=$1 RSYNC_PATH=$2 WANT_ARCH=$3; shift 3 + ;; +esac + +## Check the available distributions for architectures. +: ${RSYNC="rsync"} +for dist in "$@"; do + $RSYNC --list-only $RSYNC_HOST::$RSYNC_PATH/dists/$dist/main/ +done | { + + ## Gather up excluded architectures as we go. + excludes="" + + while read mode size date time name; do + + ## Check directories of binary packages. If it's an architecture we + ## don't want to reject, then continue on. + case "$name" in + binary-all) + continue + ;; + binary-*) + arch=${name#binary-} + case ":$WANT_ARCH:" in *:"$arch":*) continue ;; esac + ;; + *) + continue + ;; + esac + + ## Pick out the architecture name. Check whether we've seen it before. + arch=${name#binary-} + case " $excludes " in + *" $arch "*) + ;; + *) + excludes="${excludes+$excludes }$arch" + ;; + esac + done + + ## Done. Print out the finished list. + echo $excludes +} diff --git a/bin/fetch-ftpsync b/bin/fetch-ftpsync new file mode 100755 index 0000000..c1f5e8b --- /dev/null +++ b/bin/fetch-ftpsync @@ -0,0 +1,19 @@ +#! /bin/sh -e + +here=$(pwd) +tmp=$(mktemp -d) +trap 'cd; rm -rf "$tmp"' EXIT INT TERM + +cd "$tmp" +git clone "$here" mirror +cd mirror +git checkout ftpsync + +curl https://ftp-master.debian.org/ftpsync.tar.gz | + tar --strip-components=1 -xvmzf - distrib/bin distrib/etc/common + +git add bin/ etc/common +git commit -m"New ftpsync upstream." +git push + +cd "$here" diff --git a/bin/run-mirrors b/bin/run-mirrors new file mode 100755 index 0000000..9c24c7d --- /dev/null +++ b/bin/run-mirrors @@ -0,0 +1,49 @@ +#! /bin/sh + +set -e + +## Make sure we're running as the right user. +case $(id -un) in + mirror) ;; + *) exec userv -fstdin=/dev/null mirror run ;; +esac + +## Set up a plausible environment. +HOME=/var/lib/mirror-admin; export HOME; cd +MIRRORS=/mnt/mirrors; export MIRRORS +PATH=$HOME/bin:/usr/bin:/usr/sbin:/bin:/sbin:/usr/local/bin:/usr/local/sbin +export PATH +umask 002 + +## Make sure we're running with a lock file. +case "${MIRROR_LOCKED-nil}" in + nil) exec env MIRROR_LOCKED=t locking -f var/mirror.lock "$0" "$@" ;; +esac + +## Hack for mad Kerberized NFS. +if [ -r etc/krb5.keytab ]; then + kinit -k -t etc/krb5.keytab mirror +fi + +## Before we start, rotate the logs. (Doing things this way means that we +## can be sure we don't lose new logs, even if the log rotation goes +## completely mental. +logrotate -s var/logrotate.state etc/logrotate.conf + +## Let SIGINT take out the children only. +trap "" INT + +## Now do the various mirroring things. +for file in $(run-parts --list etc/mirrors.d); do + [ -x "$file" ] || continue + base=${file##*/}; base=${base#[0-9]*-} + ( + echo + echo "***--------------------------------------------------" + echo "*** Running $base at $(date +%Y-%m-%dT%H:%M:%S)" + echo + set +e; (trap - INT; exec "$file"); rc=$?; set -e + echo + echo "*** Finished $(date +%Y-%m-%dT%H:%M:%S); rc = $rc" + ) >>log/$base.log 2>&1 +done diff --git a/doc/ftpsync/README b/doc/ftpsync/README new file mode 100644 index 0000000..e85af02 --- /dev/null +++ b/doc/ftpsync/README @@ -0,0 +1,257 @@ +Archvsync +========= + +This is the central repository for the Debian mirror scripts. The scripts +in this repository are written for the purposes of maintaining a Debian +archive mirror (and shortly, a Debian bug mirror), but they should be +easily generalizable. + + +Currently the following scripts are available: + + * ftpsync - Used to sync an archive using rsync + * runmirrors - Used to notify leaf nodes of available updates + * dircombine - Internal script to manage the mirror user's $HOME + on debian.org machines + * typicalsync - Generates a typical Debian mirror + * udh - We are lazy, just a shorthand to avoid typing the + commands, ignore... :) + +Usage +===== +For impatient people, short usage instruction: + + - Create a dedicated user for the whole mirror. + - Create a seperate directory for the mirror, writeable by the new user. + - Place the ftpsync script in the mirror user's $HOME/bin (or just $HOME) + - Place the ftpsync.conf.sample into $HOME/etc as ftpsync.conf and edit + it to suit your system. You should at the very least change the TO= + and RSYNC_HOST lines. + - Create $HOME/log (or wherever you point $LOGDIR to) + - Setup the .ssh/authorized_keys for the mirror user and place the public key of + your upstream mirror into it. Preface it with +no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="~/bin/ftpsync",from="IPADDRESS" + and replace $IPADDRESS with that of your upstream mirror. + - You are finished + +In order to receive different pushes or syncs from different archives, +name the config file ftpsync-$ARCHIVE.conf and call the ftpsync script +with the commandline "sync:archive:$ARCHIVE". Replace $ARCHIVE with a +sensible value. If your upstream mirror pushes you using runmirrors +bundled together with this sync script, you do not need to add the +"sync:archive" parameter to the commandline, the scripts deal with it +automatically. + + + +Debian mirror script minimum requirements +========================================= +As always, you may use whatever scripts you want for your Debian mirror, +but we *STRONGLY* recommend you to not invent your own. However, if you +want to be listed as a mirror it *MUST* support the following minimal +functionality: + + - Must perform a 2-stage sync + The archive mirroring must be done in 2 stages. The first rsync run + must ignore the index files. The correct exclude options for the + first rsync run are: + --exclude Packages* --exclude Sources* --exclude Release* --exclude ls-lR* + The first stage must not delete any files. + + The second stage should then transfer the above excluded files and + delete files that no longer belong on the mirror. + + Rationale: If archive mirroring is done in a single stage, there will be + periods of time during which the index files will reference files not + yet mirrored. + + - Must not ignore pushes whil(e|st) running. + If a push is received during a run of the mirror sync, it MUST NOT + be ignored. The whole synchronization process must be rerun. + + Rationale: Most implementations of Debian mirror scripts will leave the + mirror in an inconsistent state in the event of a second push being + received while the first sync is still running. It is likely that in + the near future, the frequency of pushes will increase. + + - Should understand multi-stage pushes. + The script should parse the arguments it gets via ssh, and if they + contain a hint to only sync stage1 or stage2, then ONLY those steps + SHOULD be performed. + + Rationale: This enables us to coordinate the timing of the first + and second stage pushes and minimize the time during which the + archive is desynchronized. This is especially important for mirrors + that are involved in a round robin or GeoDNS setup. + + The minimum arguments the script has to understand are: + sync:stage1 Only sync stage1 + sync:stage2 Only sync stage2 + sync:all Do everything. Default if none of stage1/2 are + present. + There are more possible arguments, for a complete list see the + ftpsync script in our git repository. + + + +ftpsync +======= + +This script is based on the old anonftpsync script. It has been rewritten +to add flexibilty and fix a number of outstanding issues. + +Some of the advantages of the new version are: + - Nearly every aspect is configurable + - Correct support for multiple pushes + - Support for multi-stage archive synchronisations + - Support for hook scripts at various points + - Support for multiple archives, even if they are pushed using one ssh key + - Support for multi-hop, multi-stage archive synchronisations + + Correct support for multiple pushes + ----------------------------------- + When the script receives a second push while it is running and syncing + the archive it won't ignore it. Instead it will rerun the + synchronisation step to ensure the archive is correctly synchronised. + + Scripts that fail to do that risk ending up with an inconsistent archive. + + + Can do multi-stage archive synchronisations + ------------------------------------------- + The script can be told to only perform the first or second stage of the + archive synchronisation. + + This enables us to send all the binary packages and sources to a + number of mirrors, and then tell all of them to sync the + Packages/Release files at once. This will keep the timeframe in which + the mirrors are out of sync very small and will greatly help things like + DNS RR entries or even the planned GeoDNS setup. + + + Multi-hop, multi-stage archive synchronisations + ----------------------------------------------- + The script can be told to perform a multi-hop multi-stage archive + synchronisation. + + This is basically the same as the multi-stage synchronisation + explained above, but enables the downstream mirror to push his own + staged/multi-hop downstreams before returning. This has the same + advantage than the multi-stage synchronisation but allows us to do + this over multiple level of mirrors. (Imagine one push going from + Europe to Australia, where then locally 3 others get updated before + stage2 is sent out. Instead of 4times transferring data from Europe to + Australia, just to have them all updated near instantly). + + + Can run hook scripts + -------------------- + ftpsync currently allows 5 hook scripts to run at various points of the + mirror sync run. + + Hook1: After lock is acquired, before first rsync + Hook2: After first rsync, if successful + Hook3: After second rsync, if successful + Hook4: Right before leaf mirror triggering + Hook5: After leaf mirror trigger (only if we have slave mirrors; HUB=true) + + Note that Hook3 and Hook4 are likely to be called directly after each other. + The difference is that Hook3 is called *every* time the second rsync + succeeds even if the mirroring needs to re-run due to a second push. + Hook4 is only executed if mirroring is completed. + + + Support for multiple archives, even if they are pushed using one ssh key + ------------------------------------------------------------------------ + If you get multiple archives from your upstream mirror (say Debian, + Debian-Backports and Volatile), previously you had to use 3 different ssh + keys to be able to automagically synchronize them. This script can do it + all with just one key, if your upstream mirror tells you which archive. + See "Commandline/SSH options" below for further details. + + +For details of all available options, please see the extensive documentation +in the sample configuration file. + + +Commandline/SSH options +======================= +Script options may be set either on the local command line, or passed by +specifying an ssh "command". Local commandline options always have +precedence over the SSH_ORIGINAL_COMMAND ones. + +Currently this script understands the options listed below. To make them +take effect they MUST be prepended by "sync:". + +Option Behaviour +stage1 Only do stage1 sync +stage2 Only do stage2 sync +all Do a complete sync (default) +mhop Do a multi-hop sync +archive:foo Sync archive foo (if the file $HOME/etc/ftpsync-foo.conf + exists and is configured) +callback Call back when done (needs proper ssh setup for this to + work). It will always use the "command" callback:$HOSTNAME + where $HOSTNAME is the one defined in config and + will happen before slave mirrors are triggered. + +So, to get the script to sync all of the archive behind bpo and call back when +it is complete, use an upstream trigger of +ssh $USER@$HOST sync:all sync:archive:bpo sync:callback + + +Mirror trace files +================== +Every mirror needs to have a 'trace' file under project/trace. +The file format is as follows: + + The filename has to be the full hostname (eg. hostname -f), or in the + case of a mirror participating in RR DNS (where users will never use + the hostname) the name of the DNS RR entry, eg. security.debian.org + for the security rotation) + + The content has (no leading spaces): + Sat Nov 8 13:20:22 UTC 2008 + Used ftpsync version: 42 + Running on host: steffani.debian.org + + First line: Output of date -u + Second line: Freeform text containing the program name and version + Third line: Text "Running on host: " followed by hostname -f + + The third line MUST NOT be the DNS RR name, even if the mirror is part + of it. It MUST BE the hosts own name. This is in contrast to the filename, + which SHOULD be the DNS RR name. + + +runmirrors +========== +This script is used to tell leaf mirrors that it is time to synchronize +their copy of the archive. This is done by parsing a mirror list and +using ssh to "push" the leaf nodes. You can read much more about the +principle behind the push at [1], essentially it tells the receiving +end to run a pre-defined script. As the whole setup is extremely limited +and the ssh key is not usable for anything else than the pre-defined +script this is the most secure method for such an action. + +This script supports two types of pushes: The normal single stage push, +as well as the newer multi-stage push. + +The normal push, as described above, will simply push the leaf node and +then go on with the other nodes. + +The multi-staged push first pushes a mirror and tells it to only do a +stage1 sync run. Then it waits for the mirror (and all others being pushed +in the same run) to finish that run, before it tells all of the staged +mirrors to do the stage2 sync. + +This way you can do a nearly-simultaneous update of multiple hosts. +This is useful in situations where periods of desynchronization should +be kept as small as possible. Examples of scenarios where this might be +useful include multiple hosts in a DNS Round Robin entry. + +For details on the mirror list please see the documented +runmirrors.mirror.sample file. + + +[1] http://blog.ganneff.de/blog/2007/12/29/ssh-triggers.html diff --git a/doc/ftpsync/ftpsync.conf.sample b/doc/ftpsync/ftpsync.conf.sample new file mode 100644 index 0000000..df490f1 --- /dev/null +++ b/doc/ftpsync/ftpsync.conf.sample @@ -0,0 +1,148 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the ftpsync mirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=`hostname -f` + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else +## that might have happened to be in there WILL BE GONE after the mirror sync! +#TO="/org/ftp.debian.org/ftp/" + +## The upstream name of the rsync share. +#RSYNC_PATH="ftp" + +## The host we mirror from +#RSYNC_HOST=some.mirror.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the ftpsync script depending on the way it +## is called. See README for a description of the multi-archive capability +## and better always include ${NAME} in this path. +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude statements here. +## See rsync(1) for the exact syntax, these are passed to rsync as written here. +## DO NOT TRY TO EXCLUDE ARCHITECTURES OR SUITES WITH THIS, IT WILL NOT WORK! +#EXCLUDE="" + +## If you do want to exclude an architecture, this is for you. +## Use as space seperated list. +## Possible values are: +## alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, kfreebsd-amd64, +## kfreebsd-i386, m68k, mipsel, mips, powerpc, s390, sh, sparc and source +## eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" +## An unset value will mirror all architectures (default!) +#ARCH_EXCLUDE="" + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors.mirrors +## and runmirrors.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, this +## defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Archive-Update-in-Progress-${MIRRORNAME}" + +# Timeout for the lockfile, in case we have bash older than v4 (and no /proc) +# LOCKTIMEOUT=${LOCKTIMEOUT:-3600} + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last +## had a successful mirror sync. Make sure that it always ends up in +## project/trace and always shows the full hostname. +## This is *relative* to ${TO} +#TRACE="project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +#RSYNC=rsync + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## limit I/O bandwidth. Value is KBytes per second, unset or 0 means unlimited +#RSYNC_BW="" + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Options the first pass gets. We do not want the Packages/Source indices +## here, and we also do not want to delete any files yet. +#RSYNC_OPTIONS1="--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*" + +## Options the second pass gets. Now we want the Packages/Source indices too +## and we also want to delete files. We also want to delete files that are +## excluded. +#RSYNC_OPTIONS2="--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded" + +## You may establish the connection via a web proxy by setting the environment +## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note +## that your web proxy's configuration must support proxy connections to port 873. +# RSYNC_PROXY= + + +## The following three options are used in case we want to "callback" the host +## we got pushed from. +#CALLBACKUSER="archvsync" +#CALLBACKHOST="none" +#CALLBACKKEY="none" + + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each other. +## Difference is: Hook3 is called *every* time the second rsync was successful, +## but even if the mirroring needs to re-run thanks to a second push. +## Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/doc/ftpsync/logrotate.conf b/doc/ftpsync/logrotate.conf new file mode 100644 index 0000000..44f00d4 --- /dev/null +++ b/doc/ftpsync/logrotate.conf @@ -0,0 +1,11 @@ +### -*-conf-*- +### +### Log rotation configuration for mirrors. + +compress +rotate 31 +minsize 4194304 +daily + +/mnt/ftp/mirror-admin/log/*.log { +} diff --git a/doc/ftpsync/pushpdo.conf.sample b/doc/ftpsync/pushpdo.conf.sample new file mode 100644 index 0000000..e9be9e3 --- /dev/null +++ b/doc/ftpsync/pushpdo.conf.sample @@ -0,0 +1,40 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the runmirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Which ssh key to use? +#KEYFILE=.ssh/pushmirror + +## The directory for our logfiles +#LOGDIR="${BASEDIR}/log" + +## Our own logfile +#LOG="${LOGDIR}/${NAME}.log" + +## Our lockfile directory +#LOCKDIR="${BASEDIR}/locks" + +## We do create a logfile for every run. To save space we rotate it, this +## defines how many we keep +#LOGROTATE=14 + +## Our mirrorfile +#MIRRORS="${BASEDIR}/etc/${NAME}.mirror" + +## extra ssh options we might want. *hostwide* +#SSH_OPTS="" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## How long to wait for mirrors to do stage1 if we have multi-stage syncing +#PUSHDELAY=240 + +## which path to push +#MIRRORPATH="/org/packages.debian.org/mirror/" diff --git a/doc/ftpsync/pushpdo.mirror.sample b/doc/ftpsync/pushpdo.mirror.sample new file mode 100644 index 0000000..711d496 --- /dev/null +++ b/doc/ftpsync/pushpdo.mirror.sample @@ -0,0 +1,21 @@ +# Definition of mirror hosts we push. +# One mirror per line, with the following fields defined. +# +# ShortName HostName User SSHProtocol SSHKeyFile +# +# ShortName will be used as a shorthand in logfile outputs and for the logfile +# where every ssh output gets redirected to. +# +# If no SSHKeyFile is given, the default from the config file applies. +# If SSHProtocol is empty, it will default to 2, but if you want to +# define a keyfile you HAVE TO set protocol too! +# +# Examples: +# +# piatti piatti.debian.org archvsync +# One special value is allowed: DELAY +# This word has to be on a line itself, followed by a space and a number. +# nothing else, not even whitespace. It will trigger a pause of $number +# seconds between the two mirrors. If no number is given it defaults to +# 60 seconds. +piatti piatti.debian.org archvsync diff --git a/doc/ftpsync/runmirrors.conf.sample b/doc/ftpsync/runmirrors.conf.sample new file mode 100644 index 0000000..9a77c9e --- /dev/null +++ b/doc/ftpsync/runmirrors.conf.sample @@ -0,0 +1,53 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the runmirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Which ssh key to use? +#KEYFILE=.ssh/pushmirror + +## The directory for our logfiles +#LOGDIR="${BASEDIR}/log" + +## Our own logfile +#LOG="${LOGDIR}/${NAME}.log" + +## Our lockfile directory +#LOCKDIR="${BASEDIR}/locks" + +## We do create a logfile for every run. To save space we rotate it, this +## defines how many we keep +#LOGROTATE=14 + +## Our mirrorfile +#MIRRORS="${BASEDIR}/etc/${NAME}.mirror" + +## extra ssh options we might want. *hostwide* +## By default, ignore ssh key change of leafs +#SSH_OPTS="-o StrictHostKeyChecking=no" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## Whats our archive name? We will also tell our leafs about it +## This is usually empty, but if we are called as "runmirrors bpo" +## it will default to bpo. This way one runmirrors script can serve +## multiple archives, similar to what ftpsync does. +#PUSHARCHIVE="${CONF}" + +## How long to wait for mirrors to do stage1 if we have multi-stage syncing +#PUSHDELAY=600 + +## Hook scripts can be run at various places. +## Leave them blank/commented out if you don't want any +## Hook1: After reading config, before doing the first real action +## Hook2: Between two hosts to push +## Hook3: When everything is done +#HOOK1="" +#HOOK2="" +#HOOK3="" diff --git a/doc/ftpsync/runmirrors.mirror.sample b/doc/ftpsync/runmirrors.mirror.sample new file mode 100644 index 0000000..744ad98 --- /dev/null +++ b/doc/ftpsync/runmirrors.mirror.sample @@ -0,0 +1,72 @@ +# Definition of mirror hosts we push. +# One mirror per line, with the following fields defined. +# +# Type ShortName HostName User SSHProtocol SSHKeyFile +# +# ALTERNATIVELY the line may look like +# +# Type ShortName HostName User -$SOMESSHOPTION +# +# The fields Type, ShortName, HostName and User are *mandantory*. +# +# Type is either all, staged or mhop, meaning: +# all - do a "normal" push. Trigger them, go on. +# staged - do a two-stage push, waiting for them after stage 2(and all others that +# are staged) before doing stage2 +# mhop - send a multi-hop staged push. This will tell the mirror to initiate +# a mhop/stage1 push to its staged/mhop mirrors and then exit. +# When all mhop got back we then send the stage2 through to them. +# +# ShortName will be used as a shorthand in logfile outputs and for the logfile +# where every ssh output gets redirected to. +# +# If no SSHKeyFile is given, the default from the config file applies. +# If SSHProtocol is empty, it will default to 2, but if you want to +# define a keyfile you HAVE TO set protocol too! +# +# With the ALTERNATIVE syntax you are able to use any special ssh option +# you want just for one special mirror. The option after the username +# then MUST start with a -, in which case the whole rest of the line is taken +# as a set of options to give to ssh, it is passed through without doing +# anything with it. +# +# There is one caveat here: Should you want to use the -i option to give +# another ssh key to use, keep in mind that the master keyfile will +# always be presented too! That is, ssh will show both keys to the other +# side and the first one presented wins. Which might not be the key you +# want. There is currently no way around this, as that would mean +# dropping backward compatibility. +# +# Backwards compatibility: +# An older runmirrors script will NOT run with a newer runmirrors.mirror file, but +# a new runmirrors can run with an old runmirrors.mirror file. This should make updates +# painless. +# +# Examples: +# all eu.puccini puccini.debian.org archvsync 2 +# +# -> will push puccini.debian.org, user archvsync, using ssh protocol 2 +# and the globally configured ssh key. +# +# all eu.puccini puccini.debian.org archvsync -p 2222 +# +# -> will do the same as above, but use port 2222 to connect to. +# +# staged eu.puccini puccini.debian.org archvsync +# staged eu.powell powell.debian.org archvsync +# +# -> will push both puccini and powell in stage1, waiting for both to +# finish stage1 before stage2 gets pushed. The username will be archvsync. +# +# staged eu.puccini puccini.debian.org archvsync +# mhop eu.powell powell.debian.org archvsync +# +# -> will do the same as above, but powell gets told about mhop and can then +# push its own staged/mhop mirrors before returning. When both returned +# then stage2 is sent to both. +# +# One special value is allowed: DELAY +# This word has to be on a line itself, followed by a space and a number. +# nothing else, not even whitespace. It will trigger a pause of $number +# seconds between the two mirrors. If no number is given it defaults to +# 600 seconds. diff --git a/doc/ftpsync/websync.conf.sample b/doc/ftpsync/websync.conf.sample new file mode 100644 index 0000000..7a39313 --- /dev/null +++ b/doc/ftpsync/websync.conf.sample @@ -0,0 +1,121 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the ftpsync mirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=${MIRRORNAME:-`hostname -f`} + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else +## that might have happened to be in there WILL BE GONE after the mirror sync! +#TO=${TO:-"/org/www.debian.org/www"} + +## The upstream name of the rsync share. +#RSYNC_PATH="web.debian.org" + +## The host we mirror from +#RSYNC_HOST=www-master.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the websync script +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude statements here. +## See rsync(1) for the exact syntax, these are passed to rsync as written here. +## Please do not use this except for rare cases and after you talked to us. +## For the sponsor logos see SITE_FILTER +#EXCLUDE=${EXCLUDE:-"--exclude ${HOSTNAME}"} + +## And site specific excludes, by default its the sponsor stuff that should be local to all +#SITE_FILTER=${SITE_FILTER:-"--include sponsor.deb.* --exclude sponsor_img.* --exclude sponsor.html --exclude sponsor.*.html --filter=protect_sponsor_img.* --filter=protect_sponsor.html --filter=protect_sponsor.*.html"} + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors-websync.mirrors +## and runmirrors-websync.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, this +## defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Website-Update-in-Progress-${MIRRORNAME}" + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last +## had a successful mirror sync. Make sure that it always ends up in +## .project/trace and always shows the full hostname. +## This is *relative* to ${TO} +#TRACE=".project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +#RSYNC=rsync + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Default rsync options +#RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} + +## You may establish the connection via a web proxy by setting the environment +## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note +## that your web proxy's configuration must support proxy connections to port 873. +# RSYNC_PROXY= + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each other. +## Difference is: Hook3 is called *every* time the second rsync was successful, +## but even if the mirroring needs to re-run thanks to a second push. +## Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/etc/ftpsync-common.conf b/etc/ftpsync-common.conf new file mode 100644 index 0000000..5af48ee --- /dev/null +++ b/etc/ftpsync-common.conf @@ -0,0 +1,168 @@ +### -*-sh-*- +### +### Common ftpsync configuration for Debian mirrors. +### +### This file is sourced by configuration scripts for specfic mirrors. + +###-------------------------------------------------------------------------- +### Settings expected from the calling script. + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +: ${MIRRORNAME=`hostname -f`} + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else that +## might have happened to be in there WILL BE GONE after the mirror sync! +: ${LABEL="debian"} +: ${TO="$MIRRORS/$LABEL/"} + +## The upstream name of the rsync share. +: ${RSYNC_PATH="$LABEL"} + +## The host we mirror from +: ${RSYNC_HOST=ftp.de.debian.org} + +## In case we need a user to access the rsync share at our upstream host +: ${RSYNC_USER=} + +## If we need a user we also need a password +: ${RSYNC_PASSWORD=} + +## Architectures we want to carry. +: ${WANT_ARCH="i386:amd64"} + +## Distributions to inspect for available architectures. +: ${SEARCH_DISTS="oldstable stable testing unstable experimental"} + +###-------------------------------------------------------------------------- +### Common settings. + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the ftpsync script depending on the way it +## is called. See README for a description of the multi-archive capability +## and better always include ${NAME} in this path. +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude +## statements here. See rsync(1) for the exact syntax, these are passed to +## rsync as written here. DO NOT TRY TO EXCLUDE ARCHITECTURES OR SUITES WITH +## THIS, IT WILL NOT WORK! +#EXCLUDE="" + +## If you do want to exclude an architecture, this is for you. +## Use as space seperated list. +## Possible values are: +## alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, kfreebsd-amd64, +## kfreebsd-i386, m68k, mipsel, mips, powerpc, s390, sh, sparc and source +## eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" +## An unset value will mirror all architectures (default!) +#ARCH_EXCLUDE="" + +## Actually, no: we want to /include/ only some architectures. So we need to +## find out which ones are available so that we can exclude the ones we don't +## want. Crazy, no? +ARCH_EXCLUDE=$( + bin/debian-excludes $RSYNC_HOST $RSYNC_PATH $WANT_ARCH $SEARCH_DISTS + echo source +) + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors.mirrors +## and runmirrors.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, +## this defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Archive-Update-in-Progress-${MIRRORNAME}" + +## Timeout for the lockfile, in case we have bash older than v4 (and no +## /proc) +# LOCKTIMEOUT=${LOCKTIMEOUT:-3600} + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last had a +## successful mirror sync. Make sure that it always ends up in project/trace +## and always shows the full hostname. This is *relative* to ${TO} +#TRACE="project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +: ${RSYNC="rsync"} + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## limit I/O bandwidth. Value is KBytes per second, unset or 0 means +## unlimited +#RSYNC_BW="" + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-prltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Options the first pass gets. We do not want the Packages/Source indices +## here, and we also do not want to delete any files yet. +#RSYNC_OPTIONS1="--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*" + +## Options the second pass gets. Now we want the Packages/Source indices too +## and we also want to delete files. We also want to delete files that are +## excluded. +#RSYNC_OPTIONS2="--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded" + +## You may establish the connection via a web proxy by setting the +## environment variable RSYNC_PROXY to a hostname:port pair pointing to your +## web proxy. Note that your web proxy's configuration must support proxy +## connections to port 873. +# RSYNC_PROXY= + +## The following three options are used in case we want to "callback" the +## host we got pushed from. +#CALLBACKUSER="archvsync" +#CALLBACKHOST="none" +#CALLBACKKEY="none" + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each +## other. Difference is: Hook3 is called *every* time the second rsync was +## successful, but even if the mirroring needs to re-run thanks to a second +## push. Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= diff --git a/etc/ftpsync-debian-multimedia.conf b/etc/ftpsync-debian-multimedia.conf new file mode 100644 index 0000000..5abb9eb --- /dev/null +++ b/etc/ftpsync-debian-multimedia.conf @@ -0,0 +1,10 @@ +### -*-sh-*- +### +### ftpsync configuration for the Debian multimedia archive. + +LABEL=debian-multimedia +RSYNC_HOST=debian-multimedia.org +RSYNC_PATH=debian +SEARCH_DISTS="oldstable stable testing unstable experimental" + +. etc/ftpsync-common.conf diff --git a/etc/ftpsync-debian-security.conf b/etc/ftpsync-debian-security.conf new file mode 100644 index 0000000..17aaf8d --- /dev/null +++ b/etc/ftpsync-debian-security.conf @@ -0,0 +1,13 @@ +### -*-sh-*- +### +### ftpsync configuration for the Debian security archive. + +LABEL=debian-security +RSYNC_HOST=security.debian.org +SEARCH_DISTS=$( + for i in oldstable stable testing; do + echo $i/updates + done +) + +. etc/ftpsync-common.conf diff --git a/etc/ftpsync-debian.conf b/etc/ftpsync-debian.conf new file mode 100644 index 0000000..3417c94 --- /dev/null +++ b/etc/ftpsync-debian.conf @@ -0,0 +1,9 @@ +### -*-sh-*- +### +### ftpsync configuration for the Debian main archive. + +LABEL=debian +RSYNC_HOST=ftp.de.debian.org +SEARCH_DISTS="oldstable stable testing unstable experimental" + +. etc/ftpsync-common.conf diff --git a/etc/ftpsync-ubuntu.conf b/etc/ftpsync-ubuntu.conf new file mode 100644 index 0000000..fac95f1 --- /dev/null +++ b/etc/ftpsync-ubuntu.conf @@ -0,0 +1,9 @@ +### -*-sh-*- +### +### ftpsync configuration for the Ubuntu main archive. + +LABEL=ubuntu +RSYNC_HOST=archive.ubuntu.com +SEARCH_DISTS="karmic lucid maverick natty oneiric" + +. etc/ftpsync-common.conf diff --git a/etc/logrotate.conf b/etc/logrotate.conf new file mode 100644 index 0000000..601aeb5 --- /dev/null +++ b/etc/logrotate.conf @@ -0,0 +1,11 @@ +### -*-conf-*- +### +### Log rotation configuration for mirrors. + +/var/lib/mirror-admin/log/*.log { + compress + rotate 8 + notifempty + weekly + create 0644 +} diff --git a/etc/mirror.packages b/etc/mirror.packages new file mode 100644 index 0000000..a790722 --- /dev/null +++ b/etc/mirror.packages @@ -0,0 +1,27 @@ +### -*-conf-*- +### +### General mirror configuration. + +###-------------------------------------------------------------------------- +package=defaults + + ## Basic login stuff. + remote_user=ftp + remote_password=mirror@distorted.org.uk + passive_ftp=true + timeout=120 + + ## What to fetch. + recursive=true + exclude_patt+|\~$ + + ## Sensible mode settings. + mode_copy=false + file_mode=0664 + dir_mode=0775 + + ## Tidy up files which disappear upstream. + do_deletes=true + max_delete_files=80% + +###----- That's all, folks -------------------------------------------------- diff --git a/lib/functions.sh b/lib/functions.sh new file mode 100644 index 0000000..6f6a84f --- /dev/null +++ b/lib/functions.sh @@ -0,0 +1,27 @@ +### -*-sh-*- +### +### Common functions for mirror scripts. + +rsync_timeout=60 + +standard_rsync () { + for i in 1 2 3 4 5; do + set +e + rsync --recursive \ + --verbose \ + --timeout=$rsync_timeout \ + --times --omit-dir-times \ + --links --safe-links \ + --delete-delay --delete-excluded --delay-updates \ + --chmod=ug=rw,o=r,Dg+s,Da+x --perms \ + "$@" + rc=$? + set -e + case $rc in + 30) ;; + *) return $rc ;; + esac + echo >&2 "rsync connection timed out: retrying" + done + return 30 +}