ping_mon/pingmon.py

131 lines
3.7 KiB
Python

from termios import TAB3
import threading
from prometheus_client import Counter
import json
from telnetlib import theNULL
import pingparsing
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
import logging
logging.basicConfig(level=logging.WARN)
import os
from slack import WebClient
from slack.errors import SlackApiError
slack_token = 'xoxb-250697658007-4127858946773-gyM0PMGA5XPbh3ZEYot4Yy5Z'
def slack_message(message, channel):
client = WebClient(token=slack_token)
try:
response = client.chat_postMessage(
channel=channel,
text=message
)
except SlackApiError as e:
# You will get a SlackApiError if "ok" is False
assert e.response["error"] # str like 'invalid_auth', 'channel_not_found'
tests_prod={
"1": {
'dest': "10.90.1.254",
'name': 'Runpod GW',
'packet_loss_rate_permitted': 100,
'rtt_avg_permitted': 5,
},
"2": {
'dest': "149.36.0.254",
'name': 'Nexgen Fortigate',
'packet_loss_rate_permitted': 0,
'rtt_avg_permitted': 5,
},
"3": {
'dest': "217.17.208.20",
'name': 'First Hop',
'packet_loss_rate_permitted': 0,
'rtt_avg_permitted': 20,
},
}
tests_dev={
"1": {
'dest': "66.29.128.140",
'name': 'African webserver',
'packet_loss_rate_permitted': 50,
'rtt_avg_permitted': 150,
},
"2": {
'dest': "8.8.8.8",
'name': 'Google DNS',
'packet_loss_rate_permitted': 0,
'rtt_avg_permitted': 150,
},
"3": {
'dest': "1.1.1.1",
'name': 'Cloudflare DNS',
'packet_loss_rate_permitted': 0,
'rtt_avg_permitted': 150,
},
}
tests=tests_dev
def pinger(test_id):
print("Testing {} on IP {} with RTT Avg threshold of {} and packet loss max of {}".format(
tests[test_id]['name'],tests[test_id]['dest'],
tests[test_id]['rtt_avg_permitted'],tests[test_id]['packet_loss_rate_permitted']))
dest=tests[test_id]['dest']
name=tests[test_id]['name']
rtt_avg_permitted=tests[test_id]['rtt_avg_permitted']
packet_loss_rate_permitted=tests[test_id]['packet_loss_rate_permitted']
ping_parser = pingparsing.PingParsing()
transmitter = pingparsing.PingTransmitter()
transmitter.destination = dest
transmitter.count = 10
transmitter.timeout=2
# while 1:
result = transmitter.ping()
data=ping_parser.parse(result).as_dict()
packet_loss_rate=data["packet_loss_rate"]
rtt_avg=data["rtt_avg"]
notify=0
print("Dest: {} Loss: {}% RTT: {}ms".format(dest,packet_loss_rate,rtt_avg))
if rtt_avg>rtt_avg_permitted:
error_msg="ERROR: rtt_avg_permitted exceeded!"
notify=1
if packet_loss_rate>packet_loss_rate_permitted:
error_msg="ERROR: packet_loss_rate_permitted exceeded!"
notify=1
if notify:
issue_data=json.dumps(ping_parser.parse(result).as_dict(), indent=4)
slack_message(error_msg + " " + str(tests[test_id]) + issue_data,"ng-alerts")
print(issue_data)
registry = CollectorRegistry()
g_pl = Gauge('packet_loss_rate', 'Amt of packet loss', ['destination_ip'],registry=registry )
g_pl.labels(dest).set(packet_loss_rate)
if not type(rtt_avg)=="NoneType":
g_rtt = Gauge('rtt_avg', 'Round trip time', ['destination_ip'],registry=registry )
g_rtt.labels(dest).set(rtt_avg)
push_to_gateway('10.10.110.250:9091', job='cory_test_job1', registry=registry)
if __name__ == "__main__":
while 1:
for _id,_item in tests.items():
pinger(_id)
# t = threading.Thread(target=pinger, args=(_id,))
# t.start()