provide pagerduty alert

This commit is contained in:
Raymond P. Burkholder 2016-09-02 17:18:20 -03:00 committed by GitHub
parent 439a4ca5d7
commit ab4ff14abc
1 changed files with 61 additions and 15 deletions

View File

@ -1,42 +1,78 @@
#!/bin/bash #!/bin/bash
# rburkholder@quovadis.bm # rburkholder@quovadis.bm
# raymond@burkholder.net # raymond@burkholder.net
# http://blog.raymond.burkholder.net
# requires use of (with some modifications, will post):
# https://github.com/enigma-io/pd-trigger
# user defined settings # user defined settings
txtEmail="user@example.com" txtEmail="name@example.com"
txtSubject="Path Loss Report" txtSubject="Path Loss Report"
txtSeparator="==============================\n" txtSeparator="==============================\n"
nTrigger=4 nTrigger=4
nAttempts=5 nAttempts=5
txtPdServiceKey="000000000000000"
declare -A nodes declare -A nodes
nodes=( \ nodes=( \
[8.8.8.8]="google1" \ [8.8.8.8]="google1 member" \
[8.8.4.4]="google2" \ [8.8.4.4]="google2" \
) )
# local variables # local variables
cntNotify=0
status="|" status="|"
cntNotify=0
tmpLog=$(mktemp) tmpLog=$(mktemp)
tmpPing=$(mktemp) tmpPing=$(mktemp)
cntNodes=0
cntNodesDown=0
cntMembers=0
cntMembersDown=0
declare -a items
# preload output # preload output
date > ${tmpLog} date > ${tmpLog}
# loop through nodes and test # loop through nodes and test
for node in ${!nodes[*]}; do for node in ${!nodes[*]}; do
((cntNodes++))
# split out node details
# 0: alias/name
# 1: optional 'member' for determining edge outage
info=${nodes[${node}]}
items[1]="none"
ix=0
for arg in ${info}; do
items[ix]=${arg}
((ix++))
done
name="${items[0]}"
echo -e ${txtSeparator} >> ${tmpLog}; echo -e ${txtSeparator} >> ${tmpLog};
echo "checking node ${nodes[${node}]}:" >> ${tmpLog}; echo "checking node ${name}:" >> ${tmpLog};
echo "" >> ${tmpLog} echo "" >> ${tmpLog}
ping -W 1 -c ${nAttempts} ${node} > ${tmpPing} ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
cat ${tmpPing} >> ${tmpLog} cat ${tmpPing} >> ${tmpLog}
value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4) value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
if [[ nTrigger -ge value ]]; then flagNxt="dn" if [[ nTrigger -ge value ]]; then
else flagNxt="up"; fi flagNxt="dn"
((cntNodesDown++))
else
flagNxt="up"
fi
if test "member" == "${items[1]}"; then
((cntMembers++))
if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
fi
flagPrv="na" flagPrv="na"
@ -44,35 +80,45 @@ for node in ${!nodes[*]}; do
flagPrv="dn" flagPrv="dn"
if test "up" = "${flagNxt}"; then if test "up" = "${flagNxt}"; then
rm /tmp/pl.dn.${node} rm /tmp/pl.dn.${node}
fi
fi fi
fi
if [[ -f /tmp/pl.up.${node} ]]; then if [[ -f /tmp/pl.up.${node} ]]; then
flagPrv="up" flagPrv="up"
if test "dn" = "${flagNxt}"; then if test "dn" = "${flagNxt}"; then
rm /tmp/pl.up.${node} rm /tmp/pl.up.${node}
fi
fi fi
fi
if test "${flagPrv}" != "${flagNxt}"; then if test "${flagPrv}" != "${flagNxt}"; then
touch /tmp/pl.${flagNxt}.${node} touch /tmp/pl.${flagNxt}.${node}
((cntNotify++)); ((cntNotify++));
mtr -w -b --report ${node} >> ${tmpLog}; mtr -w -b --report ${node} >> ${tmpLog};
status="${status} ${nodes[${node}]} ${flagPrv}>${flagNxt} |" status="${status} ${name} ${flagPrv}>${flagNxt} |"
echo "" >> ${tmpLog} echo "" >> ${tmpLog}
echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog} echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
fi fi
done done
# footer # footer
echo -e ${txtSeparator} >> ${tmpLog}; echo -e ${txtSeparator} >> ${tmpLog}
date >> ${tmpLog} date >> ${tmpLog}
# notify on failure # notify on failure
if [[ cntNotify -gt 0 ]]; then if [[ cntNodes -eq cntNodesDown ]]; then
cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail}; logger "path-loss - all nodes unreachable"
fi else
# if something to notify
if [[ cntNotify -gt 0 ]]; then
# need a pagerduty alert if all important members are down
if [[ cntMembers -eq cntMembersDown ]]; then
response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
fi
# attempt an email
cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
fi
fi
# clean up # clean up
rm ${tmpLog} rm ${tmpLog}