#!/usr/bin/perl

=head1 NAME

graphite_threshold

=head1 SYNOPSIS

    graphite_threshold -c CRITICAL [OPTIONS] GRAPHITE_EXPRESSION

=head2 OPTIONS

=over 4

=item -c|-critical -w|-warn

Threshold values.

If warning value is greater than critical it is assumed that the evaluated 
value has to be greater than the warning value to pass the check.
Otherwise it is assumed that the evaluated value has to be less than the
warning value.

If one of the values is omitted, then it is assumed that the evaluated
value has to be less then the given threshold.

=item -host

Graphite host.
Defaults to ppcgraphite.yandex.ru
Also GRAPHITE_HOST environment variable can be used.

=item -span

Time span to use when retrieving data (`from` parameter in graphite API).
Defaults to 1h.

=item -sufficient

Demanded number of non-None values.
Defaults to 1.
If the nubmer of values retrieved is less than sufficient then no output is produced.

=item -variable varname=va1l,val2,val3

Iterate over values, substitute $varname in expression and calculate aggregated check,
using logical operator "AND".

=item -nodata-code {ok|warn|crit}

Defaults to "crit".
If data cannot be retrived from graphite, or there is no sufficient data
than message with nodata-code is produced.

=back

Examples:

    graphite_threshold -c 86400 'movingAverage(one_min.direct.production.direct.redirect_max_age, 600)'
    graphite_threshold -c 45800 -span 2d 'diffSeries(time("t"), keepLastValue(one_min.direct.production.direct.currency_rates_last_fetched.date))'
    graphite_threshold -w 14400 -c 28800 'movingAverage(one_min.direct.production.bs.queue_age_heavy, 300)'
    graphite_threshold -w 14400 -c 28800 -v shard=1,2 -v par=easy1,easy2,heavy1 'movingAverage(one_min.direct.devtestsharded.bs_camp_sums.shard_$shard.par_$par.max_age, 300)'

=head1 DESCRIPTION

Gets data from graphite and compares it with given thresholds.
Prints result to STDOUT.

Possible results:
    
    *No output*
    0;OK
    1;message
    2;message

=head1 SEE ALSO
    
    graphite
    juggler
    monrun

=cut

use strict;
use warnings;

use Net::INET6Glue::INET_is_INET6;
use Getopt::Long;
use List::Util qw/max/;
use List::MoreUtils qw/uniq/;
use LWP::UserAgent;
use JSON;
use URI::Escape;

my $MAX_MSG_LEN = 1024;

my ($CRIT, $WARN);
my $HOST = $ENV{GRAPHITE_HOST} || 'ppcgraphite.yandex.ru';
my $SPAN = '1h';
my $SUFFICIENT = 1;
my $VARIABLES = {};
my $VERBOSE;
my $NODATA_CODE = "crit";
my %STATUS_TO_CODE = (qr'crit' => 2, qr'warn' => 1, qr'ok' => 0);

GetOptions(
    'h|help' => \&usage,

    'crit=f' => \$CRIT,
    'warn=f' => \$WARN,

    'host=s' => \$HOST,
    'span=s' => \$SPAN,
    'sufficient=i' => \$SUFFICIENT,
    'v|variable=s' => $VARIABLES,

    'verbose' => \$VERBOSE,
    'nodata-code=s' => \$NODATA_CODE,
) or die "unsupported options";
$MAX_MSG_LEN = 1000 if $VERBOSE;

map { $NODATA_CODE =~ s/$_/$STATUS_TO_CODE{$_}/ } keys %STATUS_TO_CODE;
die "nodata code must be {ok|warn|crit}" if $NODATA_CODE =~ /\D/;

die "crit or warn must be provided" if !defined $CRIT && !defined $WARN;
die "at least one expression must be provided" if !@ARGV;

my @expressions = substitute_vars([@ARGV], $VARIABLES);

my $all_data = graphite_data($HOST, $SPAN, \@expressions);

my ($code, $msg);
if (@$all_data != @expressions) {
    ($code, $msg) = ($NODATA_CODE, "Data exists only for ".scalar(@$all_data)." series of ".scalar(@expressions)." (no data for some 'expression').");
} else {
    my %by_codes;
    for(my $i = 0; $i < @$all_data; $i++) {
        my $points = [grep {defined} map {$_->[0]} @{$all_data->[$i]->{datapoints}}];
        my ($code, $msg);
        if (defined $SUFFICIENT && scalar @$points < $SUFFICIENT) {
            ($code, $msg) = ($NODATA_CODE, "NO DATA (retrived data is less than sufficient)");
        } else {
            my $value = $points->[-1];
            ($code, $msg) = check_borders($value, $WARN, $CRIT);
        }
        if ($code) {
            push @{$by_codes{$code}}, "$msg ".format_expr($expressions[$i]);
        }
    }

    $code = max(keys %by_codes) || 0;

    if ($code) {
        my @msgs = @{$by_codes{$code}};
        my $msg_len = max(int($MAX_MSG_LEN/@msgs) - 1, 16);
        $msg = join('|', map {truncate_str($_, $msg_len)} @msgs);
    } else {
        $msg = 'OK';
    }
}

print "$code;".truncate_str($msg, $MAX_MSG_LEN);
exit 0;

sub check_borders
{
    my ($value, $warn, $crit) = @_;

    if (defined $crit && defined $warn) {
        if ($warn > $crit) {
            if ($value > $warn) {
                return 0, "OK";
            } elsif ($crit < $value && $value <= $warn) {
                return 1, "$value <= $warn";
            } elsif ($value <= $crit) {
                return 2, "$value <= $crit";
            }
        } else {
            if ($value < $warn) {
                return 0, "OK";
            } elsif ($warn <= $value && $value < $crit) {
                return 1, "$value >= $warn";
            } elsif ($value >= $crit) {
                return 2, "$value >= $crit";
            }
        }
    } elsif (defined $crit && !defined $warn) {
        if ($value < $crit) {
            return 0, "OK";
        } else {
            return 2, "$value >= $crit";
        }
    } elsif (!defined $crit && defined $warn) {
        if ($value < $warn) {
            return 0, "OK";
        } else {
            return 1, "$value >= $warn";
        }
    }
}


sub usage
{
    system("podselect -section SYNOPSIS -section DESCRIPTION -section 'SEE ALSO' $0 | pod2text-utf8 >&2");
    exit(1);
}


sub substitute_vars
{
    my @expressions = map {+{expr => $_, vars => {}}} @{shift()};
    my $vars = shift;

    for my $varname (reverse sort keys %$vars) {
        my @values = split /,/, $vars->{$varname};
        @values = ('') if !@values;
        my @new;
        for my $expr (@expressions) {
            if ($expr->{expr} =~ /\$\Q$varname\E/) {
                for my $val (@values) {
                    (my $e = $expr->{expr}) =~ s/\$\Q$varname\E/$val/g;
                    my %vars = ( %{$expr->{vars}}, $varname => $val );
                    push @new, {expr => $e, vars => \%vars};
                }
            } else {
                push @new, $expr;
            }
        }
        @expressions = @new;
    }

    return @expressions;
}


# get data from graphite
sub graphite_data
{
    my ($host, $span, $expressions) = @_;
    my $ua = LWP::UserAgent->new();
    my $url = "http://$host/render/?"
        .join("&", map {"target=".uri_escape($_->{expr})} @$expressions)
        ."&format=json&from=-$span";
    my $resp = $ua->get($url);
    unless ($resp->is_success) {
        die $url."\n".$resp->status_line."\n".$resp->decoded_content;
    }
    return decode_json $resp->decoded_content;
}


# try to get measure name from target functions
sub format_expr 
{
    my $expr = shift;

    my $vars = join ',', map {"$_=$expr->{vars}->{$_}"} sort keys %{$expr->{vars}};

    # find first target (sequence with at least two dots)
    my $targ = $expr->{expr} =~ /(\w+\.\w+\.[\w\.]+)/ ? $1 : $expr->{expr};

    return "$vars $targ";
}


sub truncate_str {
    my ($str, $len) = @_;
    $str = substr($str, 0, $len-2).'..' if length($str) > $len;
    return $str;
}
