rcheck: A periodic remote monitoring tool.
[rcheck] / check.d / disks
1 #! /bin/bash
2 ###
3 ### Check the health of attached physical disks.
4
5 set -e
6
7 ## Build a list of actual disk devices according to their buses.
8 disks=
9 for p in /sys/class/block/*; do
10 bus=none devtype=none idtype=none
11 while read code assg; do
12 case "$assg" in
13 DEVNAME=*) name=${assg#*=} ;;
14 ID_BUS=*) bus=${assg#*=} ;;
15 DEVTYPE=*) devtype=${assg#*=} ;;
16 ID_TYPE=*) idtype=${assg#*=} ;;
17 esac
18 done <<EOF
19 $(udevadm info --query=all --path=$p)
20 EOF
21 case "$bus,$devtype,$idtype" in
22 ata,disk,disk | scsi,disk,disk) disks=${disks+$disks }$name ;;
23 esac
24 done
25
26 ## Now go through each disk.
27 for disk in $disks; do
28 set +e; smartctl -qsilent $disk; rc=$?; set -e
29 if (( $rc & 2 )); then continue; fi
30
31 if (( $rc & 8 )); then echo "W: SMART reports disk $disk failing"; fi
32
33 smartctl -A $disk |
34 while read id attr flag value worst thresh type upd when raw; do
35 case "$id" in *[!0-9]*) continue ;; ?*) ;; *) continue ;; esac
36 while :; do
37 case "$value" in 0*?) value=${value#0} ;; *) break ;; esac;
38 done
39 while :; do
40 case "$thresh" in 0*?) thresh=${thresh#0} ;; *) break ;; esac;
41 done
42 case "$attr,$raw,$when" in
43 Current_Pending_Sector,*[!0]*,*)
44 echo "W: disk $disk has $raw pending sector(s)"
45 ;;
46 Offline_Uncorrectable,*[!0]*,*)
47 echo "W: disk $disk has $raw offline-uncorrectable sector(s)"
48 ;;
49 *,*,FAILING_NOW)
50 echo "W: disk $disk attribute $attr failing (value = $raw)"
51 ;;
52 *)
53 if (( $value < $thresh )); then
54 echo "I: disk $disk attribute $attr below thresh (value = $raw)"
55 fi
56 ;;
57 esac
58 done
59 done