diff --git a/namespace_conntrack_check.py b/namespace_conntrack_check.py new file mode 100644 index 0000000..48ddb06 --- /dev/null +++ b/namespace_conntrack_check.py @@ -0,0 +1,37 @@ +from prometheus_client import CollectorRegistry, Gauge, push_to_gateway +import sys, random +import subprocess +from pyroute2 import netns, NSPopen +import socket +import time + +registry = CollectorRegistry() + +g_count = Gauge('test_nf_conntrack_count', 'nf_conntrack_count inside given namespace', ['host','namespace'],registry=registry ) +g_max = Gauge('test_nf_conntrack_max', 'nf_conntrack_max inside given namespace', ['host','namespace'],registry=registry ) +hostname=socket.gethostname() +while True: + for _item in netns.listnetns(): + if str(_item).startswith("qrouter"): + nsp = NSPopen(_item, ['cat', '/proc/sys/net/netfilter/nf_conntrack_count'], stdout=subprocess.PIPE) + result=nsp.communicate() + nsresult_count=(result[0].decode('ascii').strip()) + + nsp = NSPopen(_item, ['cat', '/proc/sys/net/netfilter/nf_conntrack_max'], stdout=subprocess.PIPE) + result=nsp.communicate() + nsresult_max=(result[0].decode('ascii').strip()) + + print(_item + " " + nsresult_count + " " + nsresult_max) + nsp.wait() + nsp.release() + + g_count.labels(hostname,_item).set(nsresult_count) + g_max.labels(hostname,_item).set(nsresult_max) + # exit() + time.sleep(30) + + +push_to_gateway('10.10.110.250:9091', job='cory_test_job2', registry=registry) + +# print("Done") +# # curl -X GET http://10.10.110.250:9091/api/v1/metrics | jq diff --git a/pingmon.py b/pingmon.py index b7d9762..be64c58 100644 --- a/pingmon.py +++ b/pingmon.py @@ -1,6 +1,5 @@ from termios import TAB3 import threading -from types import NoneType from prometheus_client import Counter import json from telnetlib import theNULL @@ -27,7 +26,27 @@ def slack_message(message, channel): assert e.response["error"] # str like 'invalid_auth', 'channel_not_found' - +# tests={ +# "1": { +# 'dest': "10.90.1.254", +# 'name': 'Runpod GW', +# 'packet_loss_rate_permitted': 100, +# 'rtt_max_permitted': 5, +# }, +# "2": { +# 'dest': "149.36.0.254", +# 'name': 'Nexgen Fortigate', +# 'packet_loss_rate_permitted': 0, +# 'rtt_max_permitted': 5, +# }, +# "3": { +# 'dest': "217.17.208.20", +# 'name': 'First Hop', +# 'packet_loss_rate_permitted': 0, +# 'rtt_max_permitted': 20, +# }, + +# } tests={ @@ -76,16 +95,16 @@ def pinger(test_id): print("Dest: {} Loss: {}% RTT: {}ms".format(dest,packet_loss_rate,rtt_max)) if rtt_max>rtt_max_permitted: - print("ERROR: rtt_max_permitted exceeded!") + error_msg="ERROR: rtt_max_permitted exceeded!" notify=1 if packet_loss_rate>packet_loss_rate_permitted: - print("ERROR: packet_loss_rate_permitted exceeded!") + error_msg="ERROR: packet_loss_rate_permitted exceeded!" notify=1 if notify: issue_data=json.dumps(ping_parser.parse(result).as_dict(), indent=4) - slack_message(issue_data,"ng-alerts") + slack_message(error_msg + " " + str(tests[test_id]) + issue_data,"ng-alerts") print(issue_data) @@ -94,7 +113,7 @@ def pinger(test_id): g_pl = Gauge('packet_loss_rate', 'Amt of packet loss', ['destination_ip'],registry=registry ) g_pl.labels(dest).set(packet_loss_rate) - if not type(rtt_max)==NoneType: + if not type(rtt_max)=="NoneType": g_rtt = Gauge('rtt_max', 'Round trip time', ['destination_ip'],registry=registry ) g_rtt.labels(dest).set(rtt_max) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..511792b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +slack +pingparsing +prometheus_client \ No newline at end of file