Initial commit.
--- /dev/null
+#! /bin/sh
+###
+### Run a collection of everyday health checks.
+
+set -e
+
+for check in check.d/[!#]*[!~]; do
+ { { { set +e; "$check"; rc=$?; set -e
+ case $rc in 0) ;; *) echo >&2 "failed (rc = $rc)" ;; esac
+ } >&3; } 2>&1 |
+ sed 's/^/E: /'
+ } 3>&1 | {
+ if read line; then
+ echo "## ${check##*/}"
+ echo "$line"
+ cat
+ echo
+ fi
+ }
+done
--- /dev/null
+#! /bin/bash
+###
+### Check the health of attached physical disks.
+
+set -e
+
+## Build a list of actual disk devices according to their buses.
+disks=
+for p in /sys/class/block/*; do
+ bus=none devtype=none idtype=none
+ while read code assg; do
+ case "$assg" in
+ DEVNAME=*) name=${assg#*=} ;;
+ ID_BUS=*) bus=${assg#*=} ;;
+ DEVTYPE=*) devtype=${assg#*=} ;;
+ ID_TYPE=*) idtype=${assg#*=} ;;
+ esac
+ done <<EOF
+$(udevadm info --query=all --path=$p)
+EOF
+ case "$bus,$devtype,$idtype" in
+ ata,disk,disk | scsi,disk,disk) disks=${disks+$disks }$name ;;
+ esac
+done
+
+## Now go through each disk.
+for disk in $disks; do
+ set +e; smartctl -qsilent $disk; rc=$?; set -e
+ if (( $rc & 2 )); then continue; fi
+
+ if (( $rc & 8 )); then echo "W: SMART reports disk $disk failing"; fi
+
+ smartctl -A $disk |
+ while read id attr flag value worst thresh type upd when raw; do
+ case "$id" in *[!0-9]*) continue ;; ?*) ;; *) continue ;; esac
+ while :; do
+ case "$value" in 0*?) value=${value#0} ;; *) break ;; esac;
+ done
+ while :; do
+ case "$thresh" in 0*?) thresh=${thresh#0} ;; *) break ;; esac;
+ done
+ case "$attr,$raw,$when" in
+ Current_Pending_Sector,*[!0]*,*)
+ echo "W: disk $disk has $raw pending sector(s)"
+ ;;
+ Offline_Uncorrectable,*[!0]*,*)
+ echo "W: disk $disk has $raw offline-uncorrectable sector(s)"
+ ;;
+ *,*,FAILING_NOW)
+ echo "W: disk $disk attribute $attr failing (value = $raw)"
+ ;;
+ *)
+ if (( $value < $thresh )); then
+ echo "I: disk $disk attribute $attr below thresh (value = $raw)"
+ fi
+ ;;
+ esac
+ done
+done
--- /dev/null
+#! /bin/sh
+###
+### Report on scary things found in log files.
+
+set -e
+
+## Check Exim's paniclog to see whether it was upset about anything.
+if [ -f /var/log/exim4/paniclog ]; then
+ echo "W: Exim paniclog exists. Be afraid."
+fi
--- /dev/null
+#! /usr/bin/python
+###
+### Report on available package updates, including security updates.
+
+import time as T
+import os as OS
+import errno as E
+
+import apt_pkg as APT
+import apt.cache as AC
+
+###--------------------------------------------------------------------------
+### Some utility functions.
+
+def cache_up_to_date_p():
+ """Answer whether the cache is up-to-date."""
+
+ ## I use the same stamp file as APT's standard periodic update service.
+ ## This needs prodding via a hook in the APT configuration.
+ now = T.time()
+ try:
+ last = OS.path.getmtime('/var/lib/apt/periodic/update-stamp')
+ except OSError, err:
+ if err.errno == E.ENOENT:
+ return False
+ return now - last < 86400
+
+def upgradable_packages():
+ """Return a list of packages for which updates are available."""
+ cache = AC.Cache()
+ return [pkg for pkg in cache if pkg.is_upgradable]
+
+def security_updates_p(pkg):
+ """Answer whether any update for PKG is security-relevant."""
+
+ ## There doesn't seem to be a good way of doing this. For distributions
+ ## like `testing', security updates are folded into the main distribution
+ ## after the usual triage process, so they stop looking like security
+ ## updates. Worse, for `unstable' there aren't distinct security updates
+ ## anyway: they're all just thrown into the mixer. The good way to tell
+ ## would be to fetch the changelog and look for urgent changes. Debian's
+ ## Aptitude checks explicitly for `security.debian.org'. This check at
+ ## least also captures Ubuntu.
+ for v in pkg.versions:
+ if v < pkg.installed:
+ continue
+ for o in v.origins:
+ if o.site.startswith('security.'):
+ return True
+ return False
+
+###--------------------------------------------------------------------------
+### Main program.
+
+if not cache_up_to_date_p():
+ print 'W: package cache is more than 24 hours of date'
+updates = upgradable_packages()
+if updates:
+ plural = len(updates) != 1
+ print 'I: updates available for %d %s' % \
+ (len(updates), plural and 'packages' or 'package')
+sec = [pkg for pkg in updates if security_updates_p(pkg)]
+if sec:
+ plural = len(sec) != 1
+ print 'W: security updates available for %d %s' % \
+ (len(sec), plural and 'packages' or 'package')
+
+###----- That's all, folks --------------------------------------------------
--- /dev/null
+#! /bin/sh
+###
+### Run everyday health checks on remote systems.
+
+set -e
+cd /usr/local/share/rcheck
+
+for host in "$@"; do
+
+ { { { set +e
+ tar czf - check check.d |
+ ssh "$host" '
+set -e
+cd /tmp
+
+i=0
+while :; do
+ tmpdir=check.$$.$i
+ if mkdir -m700 $tmpdir >/dev/null 2>&1; then break; fi
+ case $i in
+ 500) echo >&2 "failed to create temporary directory"; exit 125 ;;
+ esac
+ i=$((i + 1))
+done
+trap "cd /; rm -rf /tmp/$tmpdir" INT TERM QUIT HUP EXIT
+cd $tmpdir
+
+tar xzf -
+./check'
+ rc=$?; set -e
+ case $rc in
+ 0) ;;
+ *) echo >&2 "remote check process failed (rc = $rc)" ;;
+ esac
+ } 1>&3; } 2>&1 |
+ sed 's/^/!!! /'
+ } 3>&1 |
+ if read line; then
+ cat <<EOF
+###--------------------------------------------------------------------------
+### $host
+
+$line
+EOF
+ cat
+ fi
+done