Posted by: snednarb | February 2, 2010

Get HP Disk+Array Health from ESXi

David Ligeret wrote a Python script for monitoring the status of a server running ESXi. We are running ESXi on HP hardware and his script wouldn’t catch the disk or array failures. So the attached script is my updated version that will catch the HealthState codes from the HP hardware.


#!/usr/bin/python
#
# Script for checking global health of host running VMware ESX/ESXi
#
# Licence : GNU General Public Licence (GPL) http://www.gnu.org/
# Pre-req : pywbem
#
#@---------------------------------------------------
#@ History
#@---------------------------------------------------
#@ Date : 20080820
#@ Author : David Ligeret
#@ Reason : Initial release
#@---------------------------------------------------
#@ Date : 20080821
#@ Author : David Ligeret
#@ Reason : Add verbose mode
#@---------------------------------------------------
#@ Date : 20100202
#@ Author : Branden Schneider
#@ Reason : Added HP Support (HealthState)
#@---------------------------------------------------
#

import sys
import time
import pywbem

NS = 'root/cimv2'

# define classes to check 'OperationStatus' instance
ClassesToCheck = [
'CIM_ComputerSystem',
'CIM_NumericSensor',
'CIM_Memory',
'CIM_Processor',
'CIM_RecordLog',
'OMC_DiscreteSensor',
'VMware_StorageExtent',
'VMware_Controller',
'VMware_StorageVolume',
'VMware_Battery',
'VMware_SASSATAPort'
]

# define exit codes
ExitOK = 0
ExitWarning = 1
ExitCritical = 2
ExitUnknown = 3

def verboseoutput(message, verbose) :
if verbose == 1:
print "%s %s" % (time.strftime("%Y%m%d %H:%M:%S"), message)

# check input arguments
if len(sys.argv) < 4:
sys.stderr.write('Usage : ' + sys.argv[0] + ' hostname user password\n')
sys.stderr.write('Example : ' + sys.argv[0] + ' https://myesxi:5989 root password\n')
sys.exit(1)
verbose = 0
if len(sys.argv) == 5 :
if sys.argv[4] == "verbose" :
verbose = 1

# connection to host
verboseoutput("Connection to "+sys.argv[1], verbose)
wbemclient = pywbem.WBEMConnection(sys.argv[1], (sys.argv[2], sys.argv[3]), NS)

# run the check for each defined class
GlobalStatus = ExitOK
ExitMsg = ""
for classe in ClassesToCheck :
verboseoutput("Check classe "+classe, verbose)
instance_list = wbemclient.EnumerateInstances(classe)
for instance in instance_list :
elementName = instance['ElementName']
verboseoutput("Element Name = "+elementName, verbose)

if instance['HealthState'] is not None :
elementStatus = instance['HealthState']
verboseoutput("Element HealthState = %d" % elementStatus, verbose)
interpretStatus = {
0 : ExitOK, # Unknown
5 : ExitOK, # OK
10 : ExitWarning, # Degraded
15 : ExitWarning, # Minor
20 : ExitCritical, # Major
25 : ExitCritical, # Critical
30 : ExitCritical, # Non-recoverable Error
}[elementStatus]
if (interpretStatus == ExitCritical) :
verboseoutput("GLobal exit set to CRITICAL", verbose)
GlobalStatus = ExitCritical
ExitMsg += "CRITICAL : %s
" % elementName
if (interpretStatus == ExitWarning and GlobalStatus != ExitCritical) :
verboseoutput("GLobal exit set to WARNING", verbose)
GlobalStatus = ExitWarning
ExitMsg += "WARNING : %s
" % elementName

if instance['OperationalStatus'] is not None :
elementStatus = instance['OperationalStatus'][0]
verboseoutput("Element Op Status = %d" % elementStatus, verbose)
interpretStatus = {
0 : ExitOK, # Unknown
1 : ExitCritical, # Other
2 : ExitOK, # OK
3 : ExitWarning, # Degraded
4 : ExitWarning, # Stressed
5 : ExitWarning, # Predictive Failure
6 : ExitCritical, # Error
7 : ExitCritical, # Non-Recoverable Error
8 : ExitWarning, # Starting
9 : ExitWarning, # Stopping
10 : ExitCritical, # Stopped
11 : ExitOK, # In Service
12 : ExitWarning, # No Contact
13 : ExitCritical, # Lost Communication
14 : ExitCritical, # Aborted
15 : ExitOK, # Dormant
16 : ExitCritical, # Supporting Entity in Error
17 : ExitOK, # Completed
18 : ExitOK, # Power Mode
19 : ExitOK, # DMTF Reserved
20 : ExitOK # Vendor Reserved
}[elementStatus]
if (interpretStatus == ExitCritical) :
verboseoutput("GLobal exit set to CRITICAL", verbose)
GlobalStatus = ExitCritical
ExitMsg += "CRITICAL : %s
" % elementName
if (interpretStatus == ExitWarning and GlobalStatus != ExitCritical) :
verboseoutput("GLobal exit set to WARNING", verbose)
GlobalStatus = ExitWarning
ExitMsg += "WARNING : %s
" % elementName

if GlobalStatus == 0 :
print "OK"
else :
print ExitMsg
sys.exit (GlobalStatus)

See David Ligeret’s post on VMware Communities

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Categories

%d bloggers like this: