Updated
This commit is contained in:
		
							parent
							
								
									a907034a9b
								
							
						
					
					
						commit
						822720456a
					
				
							
								
								
									
										34
									
								
								log.log
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								log.log
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,34 @@
 | 
			
		||||
Mon 26 Sep 2022 16:31:23 ACST
 | 
			
		||||
==============================
 | 
			
		||||
 | 
			
		||||
checking node google1:
 | 
			
		||||
 | 
			
		||||
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
 | 
			
		||||
64 bytes from 8.8.8.8: icmp_seq=1 ttl=57 time=22.5 ms
 | 
			
		||||
64 bytes from 8.8.8.8: icmp_seq=2 ttl=57 time=22.5 ms
 | 
			
		||||
64 bytes from 8.8.8.8: icmp_seq=3 ttl=57 time=23.7 ms
 | 
			
		||||
64 bytes from 8.8.8.8: icmp_seq=4 ttl=57 time=23.0 ms
 | 
			
		||||
64 bytes from 8.8.8.8: icmp_seq=5 ttl=57 time=23.4 ms
 | 
			
		||||
 | 
			
		||||
--- 8.8.8.8 ping statistics ---
 | 
			
		||||
5 packets transmitted, 5 received, 0% packet loss, time 4006ms
 | 
			
		||||
rtt min/avg/max/mdev = 22.525/23.038/23.742/0.472 ms
 | 
			
		||||
node google1 UP
 | 
			
		||||
==============================
 | 
			
		||||
 | 
			
		||||
checking node NG-router:
 | 
			
		||||
 | 
			
		||||
PING 149.36.0.253 (149.36.0.253) 56(84) bytes of data.
 | 
			
		||||
From 217.17.208.21 icmp_seq=1 Time to live exceeded
 | 
			
		||||
From 217.17.208.21 icmp_seq=2 Time to live exceeded
 | 
			
		||||
 | 
			
		||||
--- 149.36.0.253 ping statistics ---
 | 
			
		||||
3 packets transmitted, 0 received, +2 errors, 100% packet loss, time 2000ms
 | 
			
		||||
 | 
			
		||||
node NG-router DOWN
 | 
			
		||||
==============================
 | 
			
		||||
 | 
			
		||||
Mon 26 Sep 2022 16:31:29 ACST
 | 
			
		||||
cntNodes 2
 | 
			
		||||
cntNodesDown 1
 | 
			
		||||
cntNotify 0
 | 
			
		||||
							
								
								
									
										223
									
								
								path-loss.sh
									
									
									
									
									
								
							
							
						
						
									
										223
									
								
								path-loss.sh
									
									
									
									
									
								
							@ -13,122 +13,137 @@
 | 
			
		||||
# apt-get install python-pip
 | 
			
		||||
# pip install slacker-cli
 | 
			
		||||
# #git clone https://pypi.python.org/pypi/slacker-cli/
 | 
			
		||||
while true
 | 
			
		||||
do
 | 
			
		||||
  # user defined settings
 | 
			
		||||
  txtEmail="name@example.com"
 | 
			
		||||
  txtSubject="Path Loss Report"
 | 
			
		||||
  txtSeparator="==============================\n"
 | 
			
		||||
  nTrigger=4
 | 
			
		||||
  nAttempts=5
 | 
			
		||||
  txtPdServiceKey="000000000000000"
 | 
			
		||||
  txtSlackBotToken="xoxb-250697658007-4127858946773-gyM0PMGA5XPbh3ZEYot4Yy5Z"
 | 
			
		||||
  txtSlackChannel="general"
 | 
			
		||||
 | 
			
		||||
# user defined settings
 | 
			
		||||
txtEmail="name@example.com"
 | 
			
		||||
txtSubject="Path Loss Report"
 | 
			
		||||
txtSeparator="==============================\n"
 | 
			
		||||
nTrigger=4
 | 
			
		||||
nAttempts=5
 | 
			
		||||
txtPdServiceKey="000000000000000"
 | 
			
		||||
txtSlackBotToken="xoxb-something-or-other"
 | 
			
		||||
txtSlackChannel="pathloss"
 | 
			
		||||
  declare -A nodes
 | 
			
		||||
  nodes=( \
 | 
			
		||||
    [217.17.208.20]="NG-firsthop" \
 | 
			
		||||
    [149.36.0.254]="NG-router" \
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
declare -A nodes
 | 
			
		||||
nodes=( \
 | 
			
		||||
  [8.8.8.8]="google1 member" \
 | 
			
		||||
  [8.8.4.4]="google2" \
 | 
			
		||||
  )
 | 
			
		||||
  # local variables
 | 
			
		||||
  status="|"
 | 
			
		||||
  cntNotify=0
 | 
			
		||||
  tmpLog=$(mktemp)
 | 
			
		||||
  tmpPing=$(mktemp)
 | 
			
		||||
  cntNodes=0
 | 
			
		||||
  cntNodesDown=0
 | 
			
		||||
  cntMembers=0
 | 
			
		||||
  cntMembersDown=0
 | 
			
		||||
  declare -a items
 | 
			
		||||
  echo "tmplog @ ${tmpLog}"
 | 
			
		||||
  # preload output
 | 
			
		||||
  date > ${tmpLog}
 | 
			
		||||
 | 
			
		||||
# local variables
 | 
			
		||||
status="|"
 | 
			
		||||
cntNotify=0
 | 
			
		||||
tmpLog=$(mktemp)
 | 
			
		||||
tmpPing=$(mktemp)
 | 
			
		||||
cntNodes=0
 | 
			
		||||
cntNodesDown=0
 | 
			
		||||
cntMembers=0
 | 
			
		||||
cntMembersDown=0
 | 
			
		||||
declare -a items
 | 
			
		||||
  # loop through nodes and test
 | 
			
		||||
  for node in  ${!nodes[*]}; do
 | 
			
		||||
 | 
			
		||||
# preload output
 | 
			
		||||
date > ${tmpLog}
 | 
			
		||||
    ((cntNodes++))
 | 
			
		||||
 | 
			
		||||
# loop through nodes and test
 | 
			
		||||
for node in  ${!nodes[*]}; do
 | 
			
		||||
    # split out node details
 | 
			
		||||
    # 0: alias/name
 | 
			
		||||
    # 1: optional 'member' for determining edge outage
 | 
			
		||||
    info=${nodes[${node}]}
 | 
			
		||||
    items[1]="none"
 | 
			
		||||
    ix=0
 | 
			
		||||
    for arg in ${info}; do
 | 
			
		||||
      items[ix]=${arg}
 | 
			
		||||
      ((ix++))
 | 
			
		||||
      done
 | 
			
		||||
 | 
			
		||||
  ((cntNodes++))
 | 
			
		||||
    name="${items[0]}"
 | 
			
		||||
 | 
			
		||||
  # split out node details
 | 
			
		||||
  # 0: alias/name
 | 
			
		||||
  # 1: optional 'member' for determining edge outage
 | 
			
		||||
  info=${nodes[${node}]}
 | 
			
		||||
  items[1]="none"
 | 
			
		||||
  ix=0
 | 
			
		||||
  for arg in ${info}; do
 | 
			
		||||
    items[ix]=${arg}
 | 
			
		||||
    ((ix++))
 | 
			
		||||
    done
 | 
			
		||||
 | 
			
		||||
  name="${items[0]}"
 | 
			
		||||
 | 
			
		||||
  echo -e ${txtSeparator} >> ${tmpLog};
 | 
			
		||||
  echo "checking node ${name}:" >> ${tmpLog};
 | 
			
		||||
  echo "" >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
  ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
 | 
			
		||||
  cat ${tmpPing} >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
  value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
 | 
			
		||||
  if [[ nTrigger -ge value ]]; then
 | 
			
		||||
    flagNxt="dn"
 | 
			
		||||
    ((cntNodesDown++))
 | 
			
		||||
  else
 | 
			
		||||
    flagNxt="up"
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  if test "member" == "${items[1]}"; then
 | 
			
		||||
    ((cntMembers++))
 | 
			
		||||
    if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  flagPrv="na"
 | 
			
		||||
 | 
			
		||||
  if [[ -f /tmp/pl.dn.${node} ]]; then
 | 
			
		||||
    flagPrv="dn"
 | 
			
		||||
    if test "up" = "${flagNxt}"; then
 | 
			
		||||
      rm /tmp/pl.dn.${node}
 | 
			
		||||
      fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  if [[ -f /tmp/pl.up.${node} ]]; then
 | 
			
		||||
    flagPrv="up"
 | 
			
		||||
    if test "dn" = "${flagNxt}"; then
 | 
			
		||||
      rm /tmp/pl.up.${node}
 | 
			
		||||
      fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  if test "${flagPrv}" != "${flagNxt}"; then
 | 
			
		||||
    touch /tmp/pl.${flagNxt}.${node}
 | 
			
		||||
    ((cntNotify++));
 | 
			
		||||
    mtr -w -b --report ${node} >> ${tmpLog};
 | 
			
		||||
    status="${status} ${name} ${flagPrv}>${flagNxt} |"
 | 
			
		||||
    echo -e ${txtSeparator} >> ${tmpLog};
 | 
			
		||||
    echo "checking node ${name}:" >> ${tmpLog};
 | 
			
		||||
    echo "" >> ${tmpLog}
 | 
			
		||||
    echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
    ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
 | 
			
		||||
    cat ${tmpPing} >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
    value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
 | 
			
		||||
    if [[ nTrigger -ge value ]]; then
 | 
			
		||||
      flagNxt="dn"
 | 
			
		||||
      ((cntNodesDown++))
 | 
			
		||||
      echo "node ${name} DOWN" >> ${tmpLog};
 | 
			
		||||
 | 
			
		||||
    else
 | 
			
		||||
      echo "node ${name} UP" >> ${tmpLog};  
 | 
			
		||||
      flagNxt="up"
 | 
			
		||||
      fi
 | 
			
		||||
 | 
			
		||||
    if test "member" == "${items[1]}"; then
 | 
			
		||||
      ((cntMembers++))
 | 
			
		||||
      if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
 | 
			
		||||
      fi
 | 
			
		||||
 | 
			
		||||
    flagPrv="na"
 | 
			
		||||
 | 
			
		||||
    if [[ -f /tmp/pl.dn.${node} ]]; then
 | 
			
		||||
      flagPrv="dn"
 | 
			
		||||
      if test "up" = "${flagNxt}"; then
 | 
			
		||||
        rm /tmp/pl.dn.${node}
 | 
			
		||||
        fi
 | 
			
		||||
      fi
 | 
			
		||||
 | 
			
		||||
    if [[ -f /tmp/pl.up.${node} ]]; then
 | 
			
		||||
      flagPrv="up"
 | 
			
		||||
      if test "dn" = "${flagNxt}"; then
 | 
			
		||||
        rm /tmp/pl.up.${node}
 | 
			
		||||
      fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    if test "${flagPrv}" != "${flagNxt}"; then
 | 
			
		||||
      echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
 | 
			
		||||
      touch /tmp/pl.${flagNxt}.${node}
 | 
			
		||||
      ((cntNotify++));
 | 
			
		||||
      mtr -w -b --report -n ${node} >> ${tmpLog};
 | 
			
		||||
      status="${status} ${name} ${flagPrv}>${flagNxt} |"
 | 
			
		||||
      echo "" >> ${tmpLog}
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
# footer
 | 
			
		||||
echo -e ${txtSeparator} >> ${tmpLog}
 | 
			
		||||
date >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
# notify on failure
 | 
			
		||||
if [[ cntNodes -eq cntNodesDown ]]; then
 | 
			
		||||
  logger "path-loss - all nodes unreachable"
 | 
			
		||||
else
 | 
			
		||||
  # if something to notify
 | 
			
		||||
  if [[ cntNotify -gt 0 ]]; then
 | 
			
		||||
    # need a pagerduty alert if all important members are down
 | 
			
		||||
    if [[ cntMembers -eq cntMembersDown ]]; then
 | 
			
		||||
      response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
 | 
			
		||||
      fi
 | 
			
		||||
    # attempt an email
 | 
			
		||||
    #cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
 | 
			
		||||
    cat ${tmpLog} | /usr/local/bin/slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}
 | 
			
		||||
  # footer
 | 
			
		||||
  echo -e ${txtSeparator} >> ${tmpLog}
 | 
			
		||||
  date >> ${tmpLog}
 | 
			
		||||
  echo "cntNodes ${cntNodes}" >> ${tmpLog};
 | 
			
		||||
  echo "cntNodesDown ${cntNodesDown}" >> ${tmpLog};
 | 
			
		||||
  echo "cntNotify ${cntNotify}" >> ${tmpLog};
 | 
			
		||||
  # notify on failure
 | 
			
		||||
  if [[ cntNodes -eq cntNodesDown ]]; then
 | 
			
		||||
    echo "path-loss - all nodes unreachable" >> ${tmpLog}
 | 
			
		||||
    logger "path-loss - all nodes unreachable"
 | 
			
		||||
  else
 | 
			
		||||
    # if something to notify
 | 
			
		||||
    if [[ cntNotify -gt 0 ]]; then
 | 
			
		||||
      echo "Sending an alert" >> ${tmpLog}
 | 
			
		||||
      # need a pagerduty alert if all important members are down
 | 
			
		||||
  #    if [[ cntMembers -eq cntMembersDown ]]; then
 | 
			
		||||
        #response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
 | 
			
		||||
        #fi
 | 
			
		||||
      # attempt an email
 | 
			
		||||
      #cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
 | 
			
		||||
      echo "All members down sending slack alert with  /usr/local/bin/slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}" >> ${tmpLog}
 | 
			
		||||
      cat ${tmpLog} | slacker -c ${txtSlackChannel} -t ${txtSlackBotToken}
 | 
			
		||||
  #   fi
 | 
			
		||||
    fi
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
# clean up
 | 
			
		||||
rm ${tmpLog}
 | 
			
		||||
rm ${tmpPing}
 | 
			
		||||
  cp ${tmpLog} ./log.log
 | 
			
		||||
  cp ${tmpPing} ./ping.log
 | 
			
		||||
 | 
			
		||||
  # clean up
 | 
			
		||||
  rm ${tmpLog}
 | 
			
		||||
  rm ${tmpPing}
 | 
			
		||||
done
 | 
			
		||||
exit 0
 | 
			
		||||
							
								
								
									
										7
									
								
								ping.log
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								ping.log
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,7 @@
 | 
			
		||||
PING 149.36.0.253 (149.36.0.253) 56(84) bytes of data.
 | 
			
		||||
From 217.17.208.21 icmp_seq=1 Time to live exceeded
 | 
			
		||||
From 217.17.208.21 icmp_seq=2 Time to live exceeded
 | 
			
		||||
 | 
			
		||||
--- 149.36.0.253 ping statistics ---
 | 
			
		||||
3 packets transmitted, 0 received, +2 errors, 100% packet loss, time 2000ms
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										85
									
								
								pingmon.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								pingmon.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,85 @@
 | 
			
		||||
from termios import TAB3
 | 
			
		||||
import threading
 | 
			
		||||
from types import NoneType 
 | 
			
		||||
from prometheus_client import Counter
 | 
			
		||||
import json
 | 
			
		||||
from telnetlib import theNULL
 | 
			
		||||
import pingparsing
 | 
			
		||||
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
 | 
			
		||||
 | 
			
		||||
tests={
 | 
			
		||||
    "1": {
 | 
			
		||||
        'dest': "66.29.128.140",
 | 
			
		||||
        'name': 'African webserver',
 | 
			
		||||
        'packet_loss_rate_permitted': 50,
 | 
			
		||||
        'rtt_max_permitted': 150,
 | 
			
		||||
    },
 | 
			
		||||
    "2": {
 | 
			
		||||
        'dest': "8.8.8.8",
 | 
			
		||||
        'name': 'Google DNS',
 | 
			
		||||
        'packet_loss_rate_permitted': 0,
 | 
			
		||||
        'rtt_max_permitted': 150,
 | 
			
		||||
    },
 | 
			
		||||
    "3": {
 | 
			
		||||
        'dest': "1.1.1.1",
 | 
			
		||||
        'name': 'Cloudflare DNS',
 | 
			
		||||
        'packet_loss_rate_permitted': 0,
 | 
			
		||||
        'rtt_max_permitted': 150,
 | 
			
		||||
    },
 | 
			
		||||
    
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
def pinger(test_id):
 | 
			
		||||
    print("Testing {} on IP {} with RTT threshold of {} and packet loss max of {}".format(
 | 
			
		||||
        tests[test_id]['name'],tests[test_id]['dest'],
 | 
			
		||||
        tests[test_id]['rtt_max_permitted'],tests[test_id]['packet_loss_rate_permitted']))
 | 
			
		||||
    dest=tests[test_id]['dest']
 | 
			
		||||
    name=tests[test_id]['name']
 | 
			
		||||
    rtt_max_permitted=tests[test_id]['rtt_max_permitted']
 | 
			
		||||
    packet_loss_rate_permitted=tests[test_id]['packet_loss_rate_permitted']
 | 
			
		||||
 | 
			
		||||
    ping_parser = pingparsing.PingParsing()
 | 
			
		||||
    transmitter = pingparsing.PingTransmitter()
 | 
			
		||||
    transmitter.destination = dest
 | 
			
		||||
    transmitter.count = 10
 | 
			
		||||
    transmitter.timeout=2
 | 
			
		||||
    # while 1:
 | 
			
		||||
        
 | 
			
		||||
    result = transmitter.ping()
 | 
			
		||||
    data=ping_parser.parse(result).as_dict()
 | 
			
		||||
    packet_loss_rate=data["packet_loss_rate"]
 | 
			
		||||
    rtt_max=data["rtt_max"]
 | 
			
		||||
    notify=0
 | 
			
		||||
 | 
			
		||||
    print("Dest: {} Loss: {}% RTT: {}ms".format(dest,packet_loss_rate,rtt_max))
 | 
			
		||||
    if rtt_max>rtt_max_permitted:
 | 
			
		||||
        print("ERROR: rtt_max_permitted exceeded!")
 | 
			
		||||
        notify=1
 | 
			
		||||
 | 
			
		||||
    if packet_loss_rate>packet_loss_rate_permitted:
 | 
			
		||||
        print("ERROR: packet_loss_rate_permitted exceeded!")
 | 
			
		||||
        notify=1
 | 
			
		||||
 | 
			
		||||
    if notify:
 | 
			
		||||
        print(json.dumps(ping_parser.parse(result).as_dict(), indent=4))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    registry = CollectorRegistry()
 | 
			
		||||
    g_pl = Gauge('packet_loss_rate', 'Amt of packet loss', ['destination_ip'],registry=registry )
 | 
			
		||||
    g_pl.labels(dest).set(packet_loss_rate)
 | 
			
		||||
 | 
			
		||||
    if not type(rtt_max)==NoneType:
 | 
			
		||||
        g_rtt = Gauge('rtt_max', 'Round trip time', ['destination_ip'],registry=registry )
 | 
			
		||||
        g_rtt.labels(dest).set(rtt_max)
 | 
			
		||||
    
 | 
			
		||||
    push_to_gateway('10.10.110.250:9091', job='cory_test_job1', registry=registry)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__": 
 | 
			
		||||
    while 1:
 | 
			
		||||
        for _id,_item in tests.items():
 | 
			
		||||
            pinger(_id)
 | 
			
		||||
            # t = threading.Thread(target=pinger, args=(_id,)) 
 | 
			
		||||
            # t.start() 
 | 
			
		||||
	
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user