Listing 2 load monitoring script
#!/usr/bin/ksh
###########################################################
#10/01/2003
###########################################################
#load-mon.sh command to monitor load from cron TEMPORARY
#
#this script check if idle is under 10% for last 200 sec
#and do alerting, goes each 11,21,31,41,51 minutes
#This has to be done trough RMC
#
# ARGUMENTS:
#
# QUIRCKS
# it is just improvisation ...
#
# FUNCTIONS:
# mesgLoad
# mesgHog
# eventLoad
# eventHog
#
###########################################################
#
# REVISIONS
# Wed Oct 1 10:53:07 DFT 2003
# 1.0 First version, tested and rushed into production
# 1.1 speed up - load was oscilating
# Fri Oct 24 09:49:59 DFT 2003
# 1.1 hog monitoring too - CPU cummulative for oracle >4%
# just not very good fix
# 1.2 Mon Jan 26 08:25:22 NFT 2004
# L modified to be able to fire oracle snap
#
###########################################################
#action on load over limit event
mesgLoad ()
{
logger "Load $L"
mailx -s "Load on $HST over 90%" $MAILS << EOF
Load on $HST over 90%
EOF
exit 0
}
#action on hog event
#mail is commented out
mesgHog ()
{
logger "Hog $H"
exit 0
#mailx -s "Possible CPU hog on $HST " $MAILS << EOF
#Possible CPU hog on $HST
#$H
#EOF
}
#load detection
eventLoad ()
{
sar -u -i$TIME | tail -3 | head -1 | awk '$5+0< '$IDLE ' { print }'
}
#hog detection
eventHog ()
{
ps -ekF "%u %p %C %x %c %a" | sort -rn +2 | awk '$3+0>'$HOGL' && \
$1~/oracle/ { print }'| sed 's/ *$//'
}
##########################################################
#main part
export PATH=/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin:/usr/java131/ \
jre/bin:/usr/java131/bin:/usr/lib/instl:/usr/local/bin:/usr/symcli/ \
bin:/usr/lpp/Symmetrix/bin
##########################################################
### configuration part
VERSION="1.2"
#where messages are going
MAILS="root"
#if monitoring is enabled
/rootop/control_enabled.sh || exit 0
DT=$(date "+%y.%m.%d.%H:%M:%S")
HST=$(hostname)
#sar with 10 minutes resolution -i600, changed to 5 minutes
#load idle less than 10%
IDLE=10 ; #idle
TIME=200; #200 seconds
## sensor part and condition part - there is flodding control here
L=$(eventLoad)
#long term hog - CPU over 7 - by experinece
HOGL=7 ;#cumulative CPU load
H=$(eventHog)
## event and action part
#it is event fire snapshot oracle and send message exit because all is done
test -n "$L" && mesgLoad && /rootop/fireOracleSnap.sh && exit 0
#it is event fire snapshot oracle and send message
test -n "$H" && mesgHog && /rootop/fireOracleSnap.sh
exit 0
|