#!/bin/bash
log="/var/log/sharpei/status.log"
ts=`date +%s`
host=`hostname -f | tr '.' '_'`

while getopts "w:c:s:" OPTION
do
    case $OPTION in
        w)
            WARN_LIMIT="$OPTARG"
        ;;
        c)
            CRIT_LIMIT="$OPTARG"
        ;;
        s)
            WATCH_SECONDS="$OPTARG"
        ;;
    esac
done

warn=${WARN_LIMIT:-5}
crit=${CRIT_LIMIT:-20}
sec=${WATCH_SECONDS:-60}

# $0 = [2016-Feb-18 19:48:17.084639] status=info session=GmcBXmt4R4Y1 request=poller_worker shard=2076 host=xdb19d.mail.yandex.net role=replica smooth_state=dead states=[dead, dead, dead]
# $6 = shard=2076

# tskv:
# $0 = tskv  tskv_format=tskv-log    thread=140043747022592  unixtime=1605182447 timestamp=2020-11-12T15:00:47.926640+0300   level=notice    session_id=CAM1000BBKo1 request_id=690d42ef-3057-4e14-ad43-e688ff3af09f shard_id=76 shard_name=disk-test-commondb01 host=man-jbeo7vyxi0vzsyoj.db.yandex.net port=6432   role=replica    lag=0   smooth_state=alive  states=[alive, alive, alive]
# $9 = shard_id=76

# Skip unreachable databases (60 flaps per minute)
{% if salt['pillar.get']('yandex:environment', 'dev') in ('prod', 'qa') %}
timetail -n ${sec} -t mulcagate ${log} |\
    awk -v crit=${crit} -v warn=${warn} -v code=0 \
    '/smooth_state=dead/ \
    { gsub(/[^0-9. ]/,"",$6); stat[int($6)]++; } END { \
{% else %}
timetail -n ${sec} -t tskv ${log} |\
    awk -v crit=${crit} -v warn=${warn} -v code=0 \
    '/smooth_state=dead/ \
    { gsub(/[^0-9. ]/,"",$9); stat[int($9)]++; } END { \
{% endif %}
    for (key in stat){ \
        if(stat[key] == 60) continue; \
        if(stat[key] >= warn){ \
        msg=sprintf("%s %s: %i,", msg, key, stat[key]); \
        if(code == 0) code=1; \
        }; \
        if(stat[key] >= crit) code=2; \
    } \
    printf("%i;%s\n", code, (msg ? msg : "OK")); \
    };'
