Built a a script to watch the conntrack levels inside all qrouter namespaces
This commit is contained in:
		
							parent
							
								
									ee8d75b637
								
							
						
					
					
						commit
						384eab9966
					
				
							
								
								
									
										37
									
								
								namespace_conntrack_check.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								namespace_conntrack_check.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
			
		||||
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
 | 
			
		||||
import sys, random
 | 
			
		||||
import subprocess
 | 
			
		||||
from pyroute2 import netns, NSPopen
 | 
			
		||||
import socket
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
registry = CollectorRegistry()
 | 
			
		||||
 | 
			
		||||
g_count = Gauge('test_nf_conntrack_count', 'nf_conntrack_count inside given namespace', ['host','namespace'],registry=registry )
 | 
			
		||||
g_max = Gauge('test_nf_conntrack_max', 'nf_conntrack_max inside given namespace', ['host','namespace'],registry=registry )
 | 
			
		||||
hostname=socket.gethostname()
 | 
			
		||||
while True:
 | 
			
		||||
    for _item in netns.listnetns():
 | 
			
		||||
        if str(_item).startswith("qrouter"):
 | 
			
		||||
            nsp = NSPopen(_item, ['cat', '/proc/sys/net/netfilter/nf_conntrack_count'], stdout=subprocess.PIPE)
 | 
			
		||||
            result=nsp.communicate()
 | 
			
		||||
            nsresult_count=(result[0].decode('ascii').strip())
 | 
			
		||||
 | 
			
		||||
            nsp = NSPopen(_item, ['cat', '/proc/sys/net/netfilter/nf_conntrack_max'], stdout=subprocess.PIPE)
 | 
			
		||||
            result=nsp.communicate()
 | 
			
		||||
            nsresult_max=(result[0].decode('ascii').strip())
 | 
			
		||||
            
 | 
			
		||||
            print(_item + " " + nsresult_count + " " + nsresult_max)
 | 
			
		||||
            nsp.wait()
 | 
			
		||||
            nsp.release()
 | 
			
		||||
 | 
			
		||||
            g_count.labels(hostname,_item).set(nsresult_count)
 | 
			
		||||
            g_max.labels(hostname,_item).set(nsresult_max)
 | 
			
		||||
            # exit()
 | 
			
		||||
    time.sleep(30)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
push_to_gateway('10.10.110.250:9091', job='cory_test_job2', registry=registry)
 | 
			
		||||
 | 
			
		||||
# print("Done")
 | 
			
		||||
# # curl -X GET http://10.10.110.250:9091/api/v1/metrics | jq
 | 
			
		||||
							
								
								
									
										31
									
								
								pingmon.py
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								pingmon.py
									
									
									
									
									
								
							@ -1,6 +1,5 @@
 | 
			
		||||
from termios import TAB3
 | 
			
		||||
import threading
 | 
			
		||||
from types import NoneType 
 | 
			
		||||
from prometheus_client import Counter
 | 
			
		||||
import json
 | 
			
		||||
from telnetlib import theNULL
 | 
			
		||||
@ -27,7 +26,27 @@ def slack_message(message, channel):
 | 
			
		||||
        assert e.response["error"]  # str like 'invalid_auth', 'channel_not_found'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# tests={
 | 
			
		||||
#     "1": {
 | 
			
		||||
#         'dest': "10.90.1.254",
 | 
			
		||||
#         'name': 'Runpod GW',
 | 
			
		||||
#         'packet_loss_rate_permitted': 100,
 | 
			
		||||
#         'rtt_max_permitted': 5,
 | 
			
		||||
#     },
 | 
			
		||||
#     "2": {
 | 
			
		||||
#         'dest': "149.36.0.254",
 | 
			
		||||
#         'name': 'Nexgen Fortigate',
 | 
			
		||||
#         'packet_loss_rate_permitted': 0,
 | 
			
		||||
#         'rtt_max_permitted': 5,
 | 
			
		||||
#     },
 | 
			
		||||
#     "3": {
 | 
			
		||||
#         'dest': "217.17.208.20",
 | 
			
		||||
#         'name': 'First Hop',
 | 
			
		||||
#         'packet_loss_rate_permitted': 0,
 | 
			
		||||
#         'rtt_max_permitted': 20,
 | 
			
		||||
#     },
 | 
			
		||||
    
 | 
			
		||||
# }
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
tests={
 | 
			
		||||
@ -76,16 +95,16 @@ def pinger(test_id):
 | 
			
		||||
 | 
			
		||||
    print("Dest: {} Loss: {}% RTT: {}ms".format(dest,packet_loss_rate,rtt_max))
 | 
			
		||||
    if rtt_max>rtt_max_permitted:
 | 
			
		||||
        print("ERROR: rtt_max_permitted exceeded!")
 | 
			
		||||
        error_msg="ERROR: rtt_max_permitted exceeded!"
 | 
			
		||||
        notify=1
 | 
			
		||||
 | 
			
		||||
    if packet_loss_rate>packet_loss_rate_permitted:
 | 
			
		||||
        print("ERROR: packet_loss_rate_permitted exceeded!")
 | 
			
		||||
        error_msg="ERROR: packet_loss_rate_permitted exceeded!"
 | 
			
		||||
        notify=1
 | 
			
		||||
 | 
			
		||||
    if notify:
 | 
			
		||||
        issue_data=json.dumps(ping_parser.parse(result).as_dict(), indent=4)
 | 
			
		||||
        slack_message(issue_data,"ng-alerts")
 | 
			
		||||
        slack_message(error_msg + " " + str(tests[test_id]) + issue_data,"ng-alerts")
 | 
			
		||||
        print(issue_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -94,7 +113,7 @@ def pinger(test_id):
 | 
			
		||||
    g_pl = Gauge('packet_loss_rate', 'Amt of packet loss', ['destination_ip'],registry=registry )
 | 
			
		||||
    g_pl.labels(dest).set(packet_loss_rate)
 | 
			
		||||
 | 
			
		||||
    if not type(rtt_max)==NoneType:
 | 
			
		||||
    if not type(rtt_max)=="NoneType":
 | 
			
		||||
        g_rtt = Gauge('rtt_max', 'Round trip time', ['destination_ip'],registry=registry )
 | 
			
		||||
        g_rtt.labels(dest).set(rtt_max)
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										3
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,3 @@
 | 
			
		||||
slack
 | 
			
		||||
pingparsing
 | 
			
		||||
prometheus_client
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user