From ab4ff14abc8627ea48212dc4298d04f899285346 Mon Sep 17 00:00:00 2001 From: "Raymond P. Burkholder" Date: Fri, 2 Sep 2016 17:18:20 -0300 Subject: [PATCH] provide pagerduty alert --- path-loss.sh | 76 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/path-loss.sh b/path-loss.sh index 0ef5ca6..75a42a8 100644 --- a/path-loss.sh +++ b/path-loss.sh @@ -1,42 +1,78 @@ #!/bin/bash # rburkholder@quovadis.bm + # raymond@burkholder.net +# http://blog.raymond.burkholder.net + +# requires use of (with some modifications, will post): +# https://github.com/enigma-io/pd-trigger # user defined settings -txtEmail="user@example.com" +txtEmail="name@example.com" txtSubject="Path Loss Report" txtSeparator="==============================\n" nTrigger=4 nAttempts=5 +txtPdServiceKey="000000000000000" declare -A nodes nodes=( \ - [8.8.8.8]="google1" \ + [8.8.8.8]="google1 member" \ [8.8.4.4]="google2" \ ) # local variables -cntNotify=0 status="|" +cntNotify=0 tmpLog=$(mktemp) tmpPing=$(mktemp) +cntNodes=0 +cntNodesDown=0 +cntMembers=0 +cntMembersDown=0 +declare -a items # preload output date > ${tmpLog} # loop through nodes and test for node in ${!nodes[*]}; do + + ((cntNodes++)) + + # split out node details + # 0: alias/name + # 1: optional 'member' for determining edge outage + info=${nodes[${node}]} + items[1]="none" + ix=0 + for arg in ${info}; do + items[ix]=${arg} + ((ix++)) + done + + name="${items[0]}" + echo -e ${txtSeparator} >> ${tmpLog}; - echo "checking node ${nodes[${node}]}:" >> ${tmpLog}; + echo "checking node ${name}:" >> ${tmpLog}; echo "" >> ${tmpLog} ping -W 1 -c ${nAttempts} ${node} > ${tmpPing} cat ${tmpPing} >> ${tmpLog} value=$(grep transmitted ${tmpPing} | cut -d ' ' -f 4) - if [[ nTrigger -ge value ]]; then flagNxt="dn" - else flagNxt="up"; fi + if [[ nTrigger -ge value ]]; then + flagNxt="dn" + ((cntNodesDown++)) + else + flagNxt="up" + fi + + if test "member" == "${items[1]}"; then + ((cntMembers++)) + if test "dn" = "${flagNxt}"; then ((cntMembersDown++)); fi + fi flagPrv="na" @@ -44,35 +80,45 @@ for node in ${!nodes[*]}; do flagPrv="dn" if test "up" = "${flagNxt}"; then rm /tmp/pl.dn.${node} + fi fi - fi if [[ -f /tmp/pl.up.${node} ]]; then flagPrv="up" if test "dn" = "${flagNxt}"; then rm /tmp/pl.up.${node} + fi fi - fi if test "${flagPrv}" != "${flagNxt}"; then touch /tmp/pl.${flagNxt}.${node} ((cntNotify++)); mtr -w -b --report ${node} >> ${tmpLog}; - status="${status} ${nodes[${node}]} ${flagPrv}>${flagNxt} |" + status="${status} ${name} ${flagPrv}>${flagNxt} |" echo "" >> ${tmpLog} echo "Above State Change: ${flagPrv}>${flagNxt}" >> ${tmpLog} - fi + fi -done + done # footer -echo -e ${txtSeparator} >> ${tmpLog}; +echo -e ${txtSeparator} >> ${tmpLog} date >> ${tmpLog} # notify on failure -if [[ cntNotify -gt 0 ]]; then - cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail}; -fi +if [[ cntNodes -eq cntNodesDown ]]; then + logger "path-loss - all nodes unreachable" +else + # if something to notify + if [[ cntNotify -gt 0 ]]; then + # need a pagerduty alert if all important members are down + if [[ cntMembers -eq cntMembersDown ]]; then + response=$(cat ${tmpLog} | ./pd-trigger.sh -L -s "${txtPdServiceKey}" -d "${status}") + fi + # attempt an email + cat ${tmpLog} | mail -s "${txtSubject}:${status}" ${txtEmail}; + fi + fi # clean up rm ${tmpLog}