Updated
This commit is contained in:
parent
a907034a9b
commit
822720456a
|
@ -0,0 +1,34 @@
|
||||||
|
Mon 26 Sep 2022 16:31:23 ACST
|
||||||
|
==============================
|
||||||
|
|
||||||
|
checking node google1:
|
||||||
|
|
||||||
|
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
|
||||||
|
64 bytes from 8.8.8.8: icmp_seq=1 ttl=57 time=22.5 ms
|
||||||
|
64 bytes from 8.8.8.8: icmp_seq=2 ttl=57 time=22.5 ms
|
||||||
|
64 bytes from 8.8.8.8: icmp_seq=3 ttl=57 time=23.7 ms
|
||||||
|
64 bytes from 8.8.8.8: icmp_seq=4 ttl=57 time=23.0 ms
|
||||||
|
64 bytes from 8.8.8.8: icmp_seq=5 ttl=57 time=23.4 ms
|
||||||
|
|
||||||
|
--- 8.8.8.8 ping statistics ---
|
||||||
|
5 packets transmitted, 5 received, 0% packet loss, time 4006ms
|
||||||
|
rtt min/avg/max/mdev = 22.525/23.038/23.742/0.472 ms
|
||||||
|
node google1 UP
|
||||||
|
==============================
|
||||||
|
|
||||||
|
checking node NG-router:
|
||||||
|
|
||||||
|
PING 149.36.0.253 (149.36.0.253) 56(84) bytes of data.
|
||||||
|
From 217.17.208.21 icmp_seq=1 Time to live exceeded
|
||||||
|
From 217.17.208.21 icmp_seq=2 Time to live exceeded
|
||||||
|
|
||||||
|
--- 149.36.0.253 ping statistics ---
|
||||||
|
3 packets transmitted, 0 received, +2 errors, 100% packet loss, time 2000ms
|
||||||
|
|
||||||
|
node NG-router DOWN
|
||||||
|
==============================
|
||||||
|
|
||||||
|
Mon 26 Sep 2022 16:31:29 ACST
|
||||||
|
cntNodes 2
|
||||||
|
cntNodesDown 1
|
||||||
|
cntNotify 0
|
223
path-loss.sh
223
path-loss.sh
|
@ -13,122 +13,137 @@
|
||||||
# apt-get install python-pip
|
# apt-get install python-pip
|
||||||
# pip install slacker-cli
|
# pip install slacker-cli
|
||||||
# #git clone https://pypi.python.org/pypi/slacker-cli/
|
# #git clone https://pypi.python.org/pypi/slacker-cli/
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
# user defined settings
|
||||||
|
txtEmail="name@example.com"
|
||||||
|
txtSubject="Path Loss Report"
|
||||||
|
txtSeparator="==============================\n"
|
||||||
|
nTrigger=4
|
||||||
|
nAttempts=5
|
||||||
|
txtPdServiceKey="000000000000000"
|
||||||
|
txtSlackBotToken="xoxb-250697658007-4127858946773-gyM0PMGA5XPbh3ZEYot4Yy5Z"
|
||||||
|
txtSlackChannel="general"
|
||||||
|
|
||||||
# user defined settings
|
declare -A nodes
|
||||||
txtEmail="name@example.com"
|
nodes=( \
|
||||||
txtSubject="Path Loss Report"
|
[217.17.208.20]="NG-firsthop" \
|
||||||
txtSeparator="==============================\n"
|
[149.36.0.254]="NG-router" \
|
||||||
nTrigger=4
|
)
|
||||||
nAttempts=5
|
|
||||||
txtPdServiceKey="000000000000000"
|
|
||||||
txtSlackBotToken="xoxb-something-or-other"
|
|
||||||
txtSlackChannel="pathloss"
|
|
||||||
|
|
||||||
declare -A nodes
|
# local variables
|
||||||
nodes=( \
|
status="|"
|
||||||
[8.8.8.8]="google1 member" \
|
cntNotify=0
|
||||||
[8.8.4.4]="google2" \
|
tmpLog=$(mktemp)
|
||||||
)
|
tmpPing=$(mktemp)
|
||||||
|
cntNodes=0
|
||||||
|
cntNodesDown=0
|
||||||
|
cntMembers=0
|
||||||
|
cntMembersDown=0
|
||||||
|
declare -a items
|
||||||
|
echo "tmplog @ ${tmpLog}"
|
||||||
|
# preload output
|
||||||
|
date > ${tmpLog}
|
||||||
|
|
||||||
# local variables
|
# loop through nodes and test
|
||||||
status="|"
|
for node in ${!nodes[*]}; do
|
||||||
cntNotify=0
|
|
||||||
tmpLog=$(mktemp)
|
|
||||||
tmpPing=$(mktemp)
|
|
||||||
cntNodes=0
|
|
||||||
cntNodesDown=0
|
|
||||||
cntMembers=0
|
|
||||||
cntMembersDown=0
|
|
||||||
declare -a items
|
|
||||||
|
|
||||||
# preload output
|
((cntNodes++))
|
||||||
date > ${tmpLog}
|
|
||||||
|
|
||||||
# loop through nodes and test
|
# split out node details
|
||||||
for node in ${!nodes[*]}; do
|
# 0: alias/name
|
||||||
|
# 1: optional 'member' for determining edge outage
|
||||||
|
info=${nodes[${node}]}
|
||||||
|
items[1]="none"
|
||||||
|
ix=0
|
||||||
|
for arg in ${info}; do
|
||||||
|
items[ix]=${arg}
|
||||||
|
((ix++))
|
||||||
|
done
|
||||||
|
|
||||||
((cntNodes++))
|
name="${items[0]}"
|
||||||
|
|
||||||
# split out node details
|
echo -e ${txtSeparator} >> ${tmpLog};
|
||||||
# 0: alias/name
|
echo "checking node ${name}:" >> ${tmpLog};
|
||||||
# 1: optional 'member' for determining edge outage
|
|
||||||
info=${nodes[${node}]}
|
|
||||||
items[1]="none"
|
|
||||||
ix=0
|
|
||||||
for arg in ${info}; do
|
|
||||||
items[ix]=${arg}
|
|
||||||
((ix++))
|
|
||||||
done
|
|
||||||
|
|
||||||
name="${items[0]}"
|
|
||||||
|
|
||||||
echo -e ${txtSeparator} >> ${tmpLog};
|
|
||||||
echo "checking node ${name}:" >> ${tmpLog};
|
|
||||||
echo "" >> ${tmpLog}
|
|
||||||
|
|
||||||
ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
|
|
||||||
cat ${tmpPing} >> ${tmpLog}
|
|
||||||
|
|
||||||
value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
|
|
||||||
if [[ nTrigger -ge value ]]; then
|
|
||||||
flagNxt="dn"
|
|
||||||
((cntNodesDown++))
|
|
||||||
else
|
|
||||||
flagNxt="up"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test "member" == "${items[1]}"; then
|
|
||||||
((cntMembers++))
|
|
||||||
if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
flagPrv="na"
|
|
||||||
|
|
||||||
if [[ -f /tmp/pl.dn.${node} ]]; then
|
|
||||||
flagPrv="dn"
|
|
||||||
if test "up" = "${flagNxt}"; then
|
|
||||||
rm /tmp/pl.dn.${node}
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -f /tmp/pl.up.${node} ]]; then
|
|
||||||
flagPrv="up"
|
|
||||||
if test "dn" = "${flagNxt}"; then
|
|
||||||
rm /tmp/pl.up.${node}
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test "${flagPrv}" != "${flagNxt}"; then
|
|
||||||
touch /tmp/pl.${flagNxt}.${node}
|
|
||||||
((cntNotify++));
|
|
||||||
mtr -w -b --report ${node} >> ${tmpLog};
|
|
||||||
status="${status} ${name} ${flagPrv}>${flagNxt} |"
|
|
||||||
echo "" >> ${tmpLog}
|
echo "" >> ${tmpLog}
|
||||||
echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
|
|
||||||
|
ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
|
||||||
|
cat ${tmpPing} >> ${tmpLog}
|
||||||
|
|
||||||
|
value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
|
||||||
|
if [[ nTrigger -ge value ]]; then
|
||||||
|
flagNxt="dn"
|
||||||
|
((cntNodesDown++))
|
||||||
|
echo "node ${name} DOWN" >> ${tmpLog};
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "node ${name} UP" >> ${tmpLog};
|
||||||
|
flagNxt="up"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "member" == "${items[1]}"; then
|
||||||
|
((cntMembers++))
|
||||||
|
if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
flagPrv="na"
|
||||||
|
|
||||||
|
if [[ -f /tmp/pl.dn.${node} ]]; then
|
||||||
|
flagPrv="dn"
|
||||||
|
if test "up" = "${flagNxt}"; then
|
||||||
|
rm /tmp/pl.dn.${node}
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -f /tmp/pl.up.${node} ]]; then
|
||||||
|
flagPrv="up"
|
||||||
|
if test "dn" = "${flagNxt}"; then
|
||||||
|
rm /tmp/pl.up.${node}
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "${flagPrv}" != "${flagNxt}"; then
|
||||||
|
echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
|
||||||
|
touch /tmp/pl.${flagNxt}.${node}
|
||||||
|
((cntNotify++));
|
||||||
|
mtr -w -b --report -n ${node} >> ${tmpLog};
|
||||||
|
status="${status} ${name} ${flagPrv}>${flagNxt} |"
|
||||||
|
echo "" >> ${tmpLog}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
done
|
done
|
||||||
|
|
||||||
# footer
|
# footer
|
||||||
echo -e ${txtSeparator} >> ${tmpLog}
|
echo -e ${txtSeparator} >> ${tmpLog}
|
||||||
date >> ${tmpLog}
|
date >> ${tmpLog}
|
||||||
|
echo "cntNodes ${cntNodes}" >> ${tmpLog};
|
||||||
# notify on failure
|
echo "cntNodesDown ${cntNodesDown}" >> ${tmpLog};
|
||||||
if [[ cntNodes -eq cntNodesDown ]]; then
|
echo "cntNotify ${cntNotify}" >> ${tmpLog};
|
||||||
logger "path-loss - all nodes unreachable"
|
# notify on failure
|
||||||
else
|
if [[ cntNodes -eq cntNodesDown ]]; then
|
||||||
# if something to notify
|
echo "path-loss - all nodes unreachable" >> ${tmpLog}
|
||||||
if [[ cntNotify -gt 0 ]]; then
|
logger "path-loss - all nodes unreachable"
|
||||||
# need a pagerduty alert if all important members are down
|
else
|
||||||
if [[ cntMembers -eq cntMembersDown ]]; then
|
# if something to notify
|
||||||
response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
|
if [[ cntNotify -gt 0 ]]; then
|
||||||
fi
|
echo "Sending an alert" >> ${tmpLog}
|
||||||
# attempt an email
|
# need a pagerduty alert if all important members are down
|
||||||
#cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
|
# if [[ cntMembers -eq cntMembersDown ]]; then
|
||||||
cat ${tmpLog} | /usr/local/bin/slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}
|
#response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
|
||||||
|
#fi
|
||||||
|
# attempt an email
|
||||||
|
#cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
|
||||||
|
echo "All members down sending slack alert with /usr/local/bin/slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}" >> ${tmpLog}
|
||||||
|
cat ${tmpLog} | slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}
|
||||||
|
# fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# clean up
|
cp ${tmpLog} ./log.log
|
||||||
rm ${tmpLog}
|
cp ${tmpPing} ./ping.log
|
||||||
rm ${tmpPing}
|
|
||||||
|
# clean up
|
||||||
|
rm ${tmpLog}
|
||||||
|
rm ${tmpPing}
|
||||||
|
done
|
||||||
|
exit 0
|
|
@ -0,0 +1,7 @@
|
||||||
|
PING 149.36.0.253 (149.36.0.253) 56(84) bytes of data.
|
||||||
|
From 217.17.208.21 icmp_seq=1 Time to live exceeded
|
||||||
|
From 217.17.208.21 icmp_seq=2 Time to live exceeded
|
||||||
|
|
||||||
|
--- 149.36.0.253 ping statistics ---
|
||||||
|
3 packets transmitted, 0 received, +2 errors, 100% packet loss, time 2000ms
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
from termios import TAB3
|
||||||
|
import threading
|
||||||
|
from types import NoneType
|
||||||
|
from prometheus_client import Counter
|
||||||
|
import json
|
||||||
|
from telnetlib import theNULL
|
||||||
|
import pingparsing
|
||||||
|
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
|
||||||
|
|
||||||
|
tests={
|
||||||
|
"1": {
|
||||||
|
'dest': "66.29.128.140",
|
||||||
|
'name': 'African webserver',
|
||||||
|
'packet_loss_rate_permitted': 50,
|
||||||
|
'rtt_max_permitted': 150,
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
'dest': "8.8.8.8",
|
||||||
|
'name': 'Google DNS',
|
||||||
|
'packet_loss_rate_permitted': 0,
|
||||||
|
'rtt_max_permitted': 150,
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
'dest': "1.1.1.1",
|
||||||
|
'name': 'Cloudflare DNS',
|
||||||
|
'packet_loss_rate_permitted': 0,
|
||||||
|
'rtt_max_permitted': 150,
|
||||||
|
},
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def pinger(test_id):
|
||||||
|
print("Testing {} on IP {} with RTT threshold of {} and packet loss max of {}".format(
|
||||||
|
tests[test_id]['name'],tests[test_id]['dest'],
|
||||||
|
tests[test_id]['rtt_max_permitted'],tests[test_id]['packet_loss_rate_permitted']))
|
||||||
|
dest=tests[test_id]['dest']
|
||||||
|
name=tests[test_id]['name']
|
||||||
|
rtt_max_permitted=tests[test_id]['rtt_max_permitted']
|
||||||
|
packet_loss_rate_permitted=tests[test_id]['packet_loss_rate_permitted']
|
||||||
|
|
||||||
|
ping_parser = pingparsing.PingParsing()
|
||||||
|
transmitter = pingparsing.PingTransmitter()
|
||||||
|
transmitter.destination = dest
|
||||||
|
transmitter.count = 10
|
||||||
|
transmitter.timeout=2
|
||||||
|
# while 1:
|
||||||
|
|
||||||
|
result = transmitter.ping()
|
||||||
|
data=ping_parser.parse(result).as_dict()
|
||||||
|
packet_loss_rate=data["packet_loss_rate"]
|
||||||
|
rtt_max=data["rtt_max"]
|
||||||
|
notify=0
|
||||||
|
|
||||||
|
print("Dest: {} Loss: {}% RTT: {}ms".format(dest,packet_loss_rate,rtt_max))
|
||||||
|
if rtt_max>rtt_max_permitted:
|
||||||
|
print("ERROR: rtt_max_permitted exceeded!")
|
||||||
|
notify=1
|
||||||
|
|
||||||
|
if packet_loss_rate>packet_loss_rate_permitted:
|
||||||
|
print("ERROR: packet_loss_rate_permitted exceeded!")
|
||||||
|
notify=1
|
||||||
|
|
||||||
|
if notify:
|
||||||
|
print(json.dumps(ping_parser.parse(result).as_dict(), indent=4))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
registry = CollectorRegistry()
|
||||||
|
g_pl = Gauge('packet_loss_rate', 'Amt of packet loss', ['destination_ip'],registry=registry )
|
||||||
|
g_pl.labels(dest).set(packet_loss_rate)
|
||||||
|
|
||||||
|
if not type(rtt_max)==NoneType:
|
||||||
|
g_rtt = Gauge('rtt_max', 'Round trip time', ['destination_ip'],registry=registry )
|
||||||
|
g_rtt.labels(dest).set(rtt_max)
|
||||||
|
|
||||||
|
push_to_gateway('10.10.110.250:9091', job='cory_test_job1', registry=registry)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
while 1:
|
||||||
|
for _id,_item in tests.items():
|
||||||
|
pinger(_id)
|
||||||
|
# t = threading.Thread(target=pinger, args=(_id,))
|
||||||
|
# t.start()
|
||||||
|
|
Loading…
Reference in New Issue