provide pagerduty alert
This commit is contained in:
		
							parent
							
								
									439a4ca5d7
								
							
						
					
					
						commit
						ab4ff14abc
					
				
							
								
								
									
										76
									
								
								path-loss.sh
									
									
									
									
									
								
							
							
						
						
									
										76
									
								
								path-loss.sh
									
									
									
									
									
								
							@ -1,42 +1,78 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
# rburkholder@quovadis.bm
 | 
			
		||||
 | 
			
		||||
# raymond@burkholder.net
 | 
			
		||||
# http://blog.raymond.burkholder.net
 | 
			
		||||
 | 
			
		||||
# requires use of (with some modifications, will post):
 | 
			
		||||
#   https://github.com/enigma-io/pd-trigger
 | 
			
		||||
 | 
			
		||||
# user defined settings
 | 
			
		||||
txtEmail="user@example.com"
 | 
			
		||||
txtEmail="name@example.com"
 | 
			
		||||
txtSubject="Path Loss Report"
 | 
			
		||||
txtSeparator="==============================\n"
 | 
			
		||||
nTrigger=4
 | 
			
		||||
nAttempts=5
 | 
			
		||||
txtPdServiceKey="000000000000000"
 | 
			
		||||
 | 
			
		||||
declare -A nodes
 | 
			
		||||
nodes=( \
 | 
			
		||||
  [8.8.8.8]="google1" \
 | 
			
		||||
  [8.8.8.8]="google1 member" \
 | 
			
		||||
  [8.8.4.4]="google2" \
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
# local variables
 | 
			
		||||
cntNotify=0
 | 
			
		||||
status="|"
 | 
			
		||||
cntNotify=0
 | 
			
		||||
tmpLog=$(mktemp)
 | 
			
		||||
tmpPing=$(mktemp)
 | 
			
		||||
cntNodes=0
 | 
			
		||||
cntNodesDown=0
 | 
			
		||||
cntMembers=0
 | 
			
		||||
cntMembersDown=0
 | 
			
		||||
declare -a items
 | 
			
		||||
 | 
			
		||||
# preload output
 | 
			
		||||
date > ${tmpLog}
 | 
			
		||||
 | 
			
		||||
# loop through nodes and test
 | 
			
		||||
for node in  ${!nodes[*]}; do
 | 
			
		||||
 | 
			
		||||
  ((cntNodes++))
 | 
			
		||||
 | 
			
		||||
  # split out node details
 | 
			
		||||
  # 0: alias/name
 | 
			
		||||
  # 1: optional 'member' for determining edge outage
 | 
			
		||||
  info=${nodes[${node}]}
 | 
			
		||||
  items[1]="none"
 | 
			
		||||
  ix=0
 | 
			
		||||
  for arg in ${info}; do
 | 
			
		||||
    items[ix]=${arg}
 | 
			
		||||
    ((ix++))
 | 
			
		||||
    done
 | 
			
		||||
 | 
			
		||||
  name="${items[0]}"
 | 
			
		||||
 | 
			
		||||
  echo -e ${txtSeparator} >> ${tmpLog};
 | 
			
		||||
  echo "checking node ${nodes[${node}]}:" >> ${tmpLog};
 | 
			
		||||
  echo "checking node ${name}:" >> ${tmpLog};
 | 
			
		||||
  echo "" >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
  ping -W 1 -c ${nAttempts} ${node} > ${tmpPing}
 | 
			
		||||
  cat ${tmpPing} >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
  value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4)
 | 
			
		||||
  if [[ nTrigger -ge value ]]; then flagNxt="dn"
 | 
			
		||||
  else flagNxt="up"; fi
 | 
			
		||||
  if [[ nTrigger -ge value ]]; then
 | 
			
		||||
    flagNxt="dn"
 | 
			
		||||
    ((cntNodesDown++))
 | 
			
		||||
  else
 | 
			
		||||
    flagNxt="up"
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  if test "member" == "${items[1]}"; then
 | 
			
		||||
    ((cntMembers++))
 | 
			
		||||
    if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
  flagPrv="na"
 | 
			
		||||
 | 
			
		||||
@ -44,35 +80,45 @@ for node in  ${!nodes[*]}; do
 | 
			
		||||
    flagPrv="dn"
 | 
			
		||||
    if test "up" = "${flagNxt}"; then
 | 
			
		||||
      rm /tmp/pl.dn.${node}
 | 
			
		||||
      fi
 | 
			
		||||
    fi
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  if [[ -f /tmp/pl.up.${node} ]]; then
 | 
			
		||||
    flagPrv="up"
 | 
			
		||||
    if test "dn" = "${flagNxt}"; then
 | 
			
		||||
      rm /tmp/pl.up.${node}
 | 
			
		||||
      fi
 | 
			
		||||
    fi
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  if test "${flagPrv}" != "${flagNxt}"; then
 | 
			
		||||
    touch /tmp/pl.${flagNxt}.${node}
 | 
			
		||||
    ((cntNotify++));
 | 
			
		||||
    mtr -w -b --report ${node} >> ${tmpLog};
 | 
			
		||||
    status="${status} ${nodes[${node}]} ${flagPrv}>${flagNxt} |"
 | 
			
		||||
    status="${status} ${name} ${flagPrv}>${flagNxt} |"
 | 
			
		||||
    echo "" >> ${tmpLog}
 | 
			
		||||
    echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog}
 | 
			
		||||
  fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
done
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
# footer
 | 
			
		||||
echo -e ${txtSeparator} >> ${tmpLog};
 | 
			
		||||
echo -e ${txtSeparator} >> ${tmpLog}
 | 
			
		||||
date >> ${tmpLog}
 | 
			
		||||
 | 
			
		||||
# notify on failure
 | 
			
		||||
if [[ cntNotify -gt 0 ]]; then
 | 
			
		||||
  cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
 | 
			
		||||
fi
 | 
			
		||||
if [[ cntNodes -eq cntNodesDown ]]; then
 | 
			
		||||
  logger "path-loss - all nodes unreachable"
 | 
			
		||||
else
 | 
			
		||||
  # if something to notify
 | 
			
		||||
  if [[ cntNotify -gt 0 ]]; then
 | 
			
		||||
    # need a pagerduty alert if all important members are down
 | 
			
		||||
    if [[ cntMembers -eq cntMembersDown ]]; then
 | 
			
		||||
      response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}")
 | 
			
		||||
      fi
 | 
			
		||||
    # attempt an email
 | 
			
		||||
    cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail};
 | 
			
		||||
    fi
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
# clean up
 | 
			
		||||
rm ${tmpLog}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user