149 lines
3.8 KiB
Bash
149 lines
3.8 KiB
Bash
#!/bin/bash
|
|
|
|
# rburkholder@quovadis.bm
|
|
|
|
# raymond@burkholder.net
|
|
# http://blog.raymond.burkholder.net
|
|
|
|
# requires use of:
|
|
# https://github.com/juanpabloaj/slacker-cli but install with pip
|
|
# https://api.slack.com/bot-users
|
|
# https://my.slack.com/apps/build/custom-integration, choose bots, create new one, and add token
|
|
# https://my.slack.com/admin#disabled) to delete test bots
|
|
# apt-get install python-pip
|
|
# pip install slacker-cli
|
|
# #git clone https://pypi.python.org/pypi/slacker-cli/
|
|
while true
|
|
do
|
|
# user defined settings
|
|
txtEmail="name@example.com"
|
|
txtSubject="Path Loss Report"
|
|
txtSeparator="==============================\n"
|
|
nTrigger=4
|
|
nAttempts=5
|
|
txtPdServiceKey="000000000000000"
|
|
txtSlackBotToken="xoxb-250697658007-4127858946773-gyM0PMGA5XPbh3ZEYot4Yy5Z"
|
|
txtSlackChannel="general"
|
|
|
|
declare -A nodes
|
|
nodes=( \
|
|
[217.17.208.20]="NG-firsthop" \
|
|
[149.36.0.254]="NG-router" \
|
|
)
|
|
|
|
# local variables
|
|
status="|"
|
|
cntNotify=0
|
|
tmpLog=$(mktemp)
|
|
tmpPing=$(mktemp)
|
|
cntNodes=0
|
|
cntNodesDown=0
|
|
cntMembers=0
|
|
cntMembersDown=0
|
|
declare -a items
|
|
echo "tmplog @ ${tmpLog}"
|
|
# preload output
|
|
date > ${tmpLog}
|
|
|
|
# loop through nodes and test
|
|
for node in ${!nodes[*]}; do
|
|
|
|
((cntNodes++))
|
|
|
|
# split out node details
|
|
# 0: alias/name
|
|
# 1: optional 'member' for determining edge outage
|
|
info=${nodes[${node}]}
|
|
items[1]="none"
|
|
ix=0
|
|
for arg in ${info}; do
|
|
items[ix]=${arg}
|
|
((ix++))
|
|
done
|
|
|
|
name="${items[0]}"
|
|
|
|
echo -e ${txtSeparator} >> ${tmpLog};
|
|
echo "checking node ${name}:" >> ${tmpLog};
|
|
echo "" >> ${tmpLog}
|
|
|
|
ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
|
|
cat ${tmpPing} >> ${tmpLog}
|
|
|
|
value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
|
|
if [[ nTrigger -ge value ]]; then
|
|
flagNxt="dn"
|
|
((cntNodesDown++))
|
|
echo "node ${name} DOWN" >> ${tmpLog};
|
|
|
|
else
|
|
echo "node ${name} UP" >> ${tmpLog};
|
|
flagNxt="up"
|
|
fi
|
|
|
|
if test "member" == "${items[1]}"; then
|
|
((cntMembers++))
|
|
if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
|
|
fi
|
|
|
|
flagPrv="na"
|
|
|
|
if [[ -f /tmp/pl.dn.${node} ]]; then
|
|
flagPrv="dn"
|
|
if test "up" = "${flagNxt}"; then
|
|
rm /tmp/pl.dn.${node}
|
|
fi
|
|
fi
|
|
|
|
if [[ -f /tmp/pl.up.${node} ]]; then
|
|
flagPrv="up"
|
|
if test "dn" = "${flagNxt}"; then
|
|
rm /tmp/pl.up.${node}
|
|
fi
|
|
fi
|
|
|
|
if test "${flagPrv}" != "${flagNxt}"; then
|
|
echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
|
|
touch /tmp/pl.${flagNxt}.${node}
|
|
((cntNotify++));
|
|
mtr -w -b --report -n ${node} >> ${tmpLog};
|
|
status="${status} ${name} ${flagPrv}>${flagNxt} |"
|
|
echo "" >> ${tmpLog}
|
|
fi
|
|
|
|
done
|
|
|
|
# footer
|
|
echo -e ${txtSeparator} >> ${tmpLog}
|
|
date >> ${tmpLog}
|
|
echo "cntNodes ${cntNodes}" >> ${tmpLog};
|
|
echo "cntNodesDown ${cntNodesDown}" >> ${tmpLog};
|
|
echo "cntNotify ${cntNotify}" >> ${tmpLog};
|
|
# notify on failure
|
|
if [[ cntNodes -eq cntNodesDown ]]; then
|
|
echo "path-loss - all nodes unreachable" >> ${tmpLog}
|
|
logger "path-loss - all nodes unreachable"
|
|
else
|
|
# if something to notify
|
|
if [[ cntNotify -gt 0 ]]; then
|
|
echo "Sending an alert" >> ${tmpLog}
|
|
# need a pagerduty alert if all important members are down
|
|
if [[ cntMembersDown >1 ]]; then
|
|
#response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
|
|
#fi
|
|
# attempt an email
|
|
#cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
|
|
echo "All members down sending slack alert with /usr/local/bin/slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}" >> ${tmpLog}
|
|
cat ${tmpLog} | /home/ubuntu/.local/bin/slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
cp ${tmpLog} ./log.log
|
|
cp ${tmpPing} ./ping.log
|
|
|
|
# clean up
|
|
rm ${tmpLog}
|
|
rm ${tmpPing}
|
|
done
|
|
exit 0 |