ZFS Watch Script


I have this script run every 10 minutes. If any zpool changes from “ONLINE”, I’ll get an email.

#!/bin/sh
#
# ZFS pool check script, tested on FreeBSD (limited testing on Linux)
#
# run this in a cronjob every X minutes (or hours)
#
# ADJUST SETTINGS BELOW
#
# what does this do?
# checks zfs pool status:
# pool status good? do nothing.
# pool status bad? do this:
# check log file timestamp. timestamp less than X? do nothing.
# log file not exist or timestamp older than X? do this:
# 1) send email notice that pool may need attention
# 2) update log file with current timestamp
#
# You can add this to your crontab to run every few minutes or hours
# to query every 10 minutes:
# */10 * * * * /opt/scripts/zfs_watch.sh > /dev/null 2>&1
#
# (It will only email you as often as specified in this file.)
#
# Sep 4th, 2018
# * re-did script to auto check all pools.
#   (no need to specify pool on command line.)
# * added check for zpool command
#
# Aug 30th, 2018
# * moved more commands to variables
# * switched from echo to printf
#
# June 19th, 2018
# * changed it so you can add pool name on command line
#   (to make it easier to monitor multiple pools in crontab)
# * added pool name check
# * added pool name to check file
#
# May 19th, 2015
# * added more commands as variables
#
# May 15th, 2015
# * added email address option
# * added hostname check
#
# May 8th, 2015
# * first version
#
# Nicholas Caito
# [email protected]
#

# ----- first things, first -----

# set up some commands
WH="/usr/bin/whoami"
PF="/usr/bin/printf"
ZP="/sbin/zpool"

# are you root?
if [ `$WH` != "root" ]; then
   $PF "\nPlease run this script as root (or using sudo).\n\n"
   exit 1
fi

# does zpool exist?
if [ ! -x $ZP ]; then
   $PF "\nThe \"zpool\" binary was not found.\n\n"
   exit 1
fi

# ----- user variables -----

# user or email address to send notice to
USER="root"

# how often should email be sent, in seconds.
# 21600 = 6 hours
# 43200 = 12 hours
EMAILTIME=21600

# location for email log file
FILELOC="/tmp"

# status to look for. usually "ONLINE"
# for debugging: you can change this to something else to test the script.
STATUS="ONLINE"

# -----

# other command variables
ST="/usr/bin/stat"
TC="/usr/bin/touch"

# command to send email
MAILCMD="/usr/bin/mail -s"

# get current timestamp
CURRENT=`/bin/date +%s`

# get hostname
HOST=`/bin/hostname -f`

# -----

# crontab note
$PF "Add \"> /dev/null 2>&1\" to end of command in crontab to prevent detailed output.\n"

# list pools
LISTPOOLS="$(${ZP} list -H -o name)"

# exit if there are no pools
if [ ! "$LISTPOOLS" ]; then
   $PF "No pools were found.\n"
   exit 0
fi

# -----

$PF "Listing all pools and their current status.\n"

for POOL in ${LISTPOOLS}; do

   # pool ok init
   OK="YES"

   # send mail init
   SEND="NO"

   # check pool health
   HEALTH="$(${ZP} list -H -o health ${POOL})"
  
   $PF "\----------------------------------------------------------------------\n" 
   $PF "Pool: ${POOL}, Status: $HEALTH"

   if [ $HEALTH == $STATUS ]; then
      $PF " - It looks good!\n"
      # do nothing
   else
      $PF " - Expecting \"$STATUS\", found \"$HEALTH\". Not good!\n"
      OK="NO"
      # if status is wrong, then go to the file
   fi

   # --- check file ---
   if [ $OK == "NO" ]; then

      # set log file
      FILE="$FILELOC/zfs-status-pool-${POOL}.log"

      $PF "Pool status was not OK. Checking for log file... ($FILE)\n"

      # check if file exists
      if [ -f $FILE ]; then

         $PF "Log file exists, checking timestamp...\n"
         # get file timestamp
         FILETIME=`$ST -f %Sm -t %s -n $FILE`

         # $PF "File time: $FILETIME\n"

         # compare time
         if [ $(($CURRENT-$FILETIME)) -ge $EMAILTIME ]; then

            $PF "The log file is old! Will send email.\n"

            # send mail
            SEND="YES"

         else
            # log file is recent
            $PF "The log file is recent. Will not send email this time.\n"
         fi

      else
         # file does not exist
         $PF "Log file does not exist. Will send email.\n"

         # send mail
         SEND="YES"

      fi
   # end file check
   fi

   # need to send email?
   if [ $SEND == "YES" ]; then

      # send mail
      $PF "Sending email...\n"

      `${ZP} status ${POOL} | $MAILCMD "ZFS error on $HOST!" $USER`

      # update file
      $PF "Updating log file...\n"

      # update time stamp
      $TC $FILE
   fi

# done checking each pool
done   

# EoF