I have this script run every 10 minutes. If any zpool changes from "ONLINE", I'll get an email.
#!/bin/sh # # ZFS pool check script, tested on FreeBSD (limited testing on Linux) # # run this in a cronjob every X minutes (or hours) # # ADJUST SETTINGS BELOW # # what does this do? # checks zfs pool status: # pool status good? do nothing. # pool status bad? do this: # check log file timestamp. timestamp less than X? do nothing. # log file not exist or timestamp older than X? do this: # 1) send email notice that pool may need attention # 2) update log file with current timestamp # # You can add this to your crontab to run every few minutes or hours # to query every 10 minutes: # */10 * * * * /opt/scripts/zfs_watch.sh > /dev/null 2>&1 # # (It will only email you as often as specified in this file.) # # Sep 4th, 2018 # * re-did script to auto check all pools. # (no need to specify pool on command line.) # * added check for zpool command # # Aug 30th, 2018 # * moved more commands to variables # * switched from echo to printf # # June 19th, 2018 # * changed it so you can add pool name on command line # (to make it easier to monitor multiple pools in crontab) # * added pool name check # * added pool name to check file # # May 19th, 2015 # * added more commands as variables # # May 15th, 2015 # * added email address option # * added hostname check # # May 8th, 2015 # * first version # # Nicholas Caito # [email protected] # # ----- first things, first ----- # set up some commands WH="/usr/bin/whoami" PF="/usr/bin/printf" ZP="/sbin/zpool" # are you root? if [ `$WH` != "root" ]; then $PF "\nPlease run this script as root (or using sudo).\n\n" exit 1 fi # does zpool exist? if [ ! -x $ZP ]; then $PF "\nThe \"zpool\" binary was not found.\n\n" exit 1 fi # ----- user variables ----- # user or email address to send notice to USER="root" # how often should email be sent, in seconds. # 21600 = 6 hours # 43200 = 12 hours EMAILTIME=21600 # location for email log file FILELOC="/tmp" # status to look for. usually "ONLINE" # for debugging: you can change this to something else to test the script. STATUS="ONLINE" # ----- # other command variables ST="/usr/bin/stat" TC="/usr/bin/touch" # command to send email MAILCMD="/usr/bin/mail -s" # get current timestamp CURRENT=`/bin/date +%s` # get hostname HOST=`/bin/hostname -f` # ----- # crontab note $PF "Add \"> /dev/null 2>&1\" to end of command in crontab to prevent detailed output.\n" # list pools LISTPOOLS="$(${ZP} list -H -o name)" # exit if there are no pools if [ ! "$LISTPOOLS" ]; then $PF "No pools were found.\n" exit 0 fi # ----- $PF "Listing all pools and their current status.\n" for POOL in ${LISTPOOLS}; do # pool ok init OK="YES" # send mail init SEND="NO" # check pool health HEALTH="$(${ZP} list -H -o health ${POOL})" $PF "\----------------------------------------------------------------------\n" $PF "Pool: ${POOL}, Status: $HEALTH" if [ $HEALTH == $STATUS ]; then $PF " - It looks good!\n" # do nothing else $PF " - Expecting \"$STATUS\", found \"$HEALTH\". Not good!\n" OK="NO" # if status is wrong, then go to the file fi # --- check file --- if [ $OK == "NO" ]; then # set log file FILE="$FILELOC/zfs-status-pool-${POOL}.log" $PF "Pool status was not OK. Checking for log file... ($FILE)\n" # check if file exists if [ -f $FILE ]; then $PF "Log file exists, checking timestamp...\n" # get file timestamp FILETIME=`$ST -f %Sm -t %s -n $FILE` # $PF "File time: $FILETIME\n" # compare time if [ $(($CURRENT-$FILETIME)) -ge $EMAILTIME ]; then $PF "The log file is old! Will send email.\n" # send mail SEND="YES" else # log file is recent $PF "The log file is recent. Will not send email this time.\n" fi else # file does not exist $PF "Log file does not exist. Will send email.\n" # send mail SEND="YES" fi # end file check fi # need to send email? if [ $SEND == "YES" ]; then # send mail $PF "Sending email...\n" `${ZP} status ${POOL} | $MAILCMD "ZFS error on $HOST!" $USER` # update file $PF "Updating log file...\n" # update time stamp $TC $FILE fi # done checking each pool done # EoF