#!/usr/bin/perl

=encoding utf8

=head1 METADATA

<crontab>
    time: */5 * * * *
    <switchman>
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<juggler>
    host:   checks_auto.direct.yandex.ru
    ttl:    30m
    tag: direct_group_internal_systems
</juggler>

# пока разные проверки для наблюдения. подумать над аггрегатом (count + max_age) для каждого типа
<juggler_check>
    host:   checks_auto.direct.yandex.ru
    name:           direct.SearchQueryReport.dynamic.unready_max_age
    raw_events:     direct.SearchQueryReport.dynamic.unready_max_age.production
    ttl:            1h
    tag: direct_group_internal_systems
    tag: direct_queues
</juggler_check>
<juggler_check>
    host:   checks_auto.direct.yandex.ru
    name:           direct.SearchQueryReport.dynamic.unready_count
    raw_events:     direct.SearchQueryReport.dynamic.unready_count.production
    ttl:            1h
    tag: direct_group_internal_systems
    tag: direct_queues
</juggler_check>
<juggler_check>
    host:   checks_auto.direct.yandex.ru
    name:           direct.SearchQueryReport.performance.unready_max_age
    raw_events:     direct.SearchQueryReport.performance.unready_max_age.production
    ttl:            1h
    tag: direct_group_internal_systems
    tag: direct_queues
</juggler_check>
<juggler_check>
    host:   checks_auto.direct.yandex.ru
    name:           direct.SearchQueryReport.performance.unready_count
    raw_events:     direct.SearchQueryReport.performance.unready_count.production
    ttl:            1h
    tag: direct_group_internal_systems
    tag: direct_queues
</juggler_check>

=head1 NAME

ppcSearchQueryMonitoring.pl

=head1 DESCRIPTION

Скрипт мониторинга очереди построения оффлайн-отчетов на базе search-query

=cut

use Direct::Modern;

use List::Util qw/max/;

use Yandex::Advmon;
use Yandex::DBTools;
use Yandex::HashUtils;

use my_inc '..';

use ScriptHelper 'Yandex::Log' => 'messages';
use Settings;
use Stat::SearchQuery::Queue;

=head1 SUBROUTINES/METHODS/VARIABLES

=head2 %BORDERS_BY_TYPE

    Пороги для мониторинга (количество, возраст), сгруппированные по типу отчета.
        проверяются отчеты в статусах New + Process
        количество - в штуках
        возраст - в минутах

=cut

my %BORDERS_BY_TYPE = (
    dynamic => {
        max_age => {
            warn =>  3 * 60,
            crit => 16 * 60,
        },
        count => {
            warn => 10,
            crit => 25,
        },
    },
    performance => {
        max_age => {
            warn => 2 * 60,
            crit => 8 * 60,
        },
        count => {
            warn => 10,
            crit => 25,
        },
    },
);

my @types = keys %Stat::SearchQuery::Queue::REPORT_TYPES;
my @statuses = qw/New Process Done Abandoned Deleted Error/;
my @stat_fields = qw/count max_age/;

my %stat;

$log->out("START");

$log->out('fetch stat');
my $data = get_all_sql(PPCDICT, [
                            'SELECT type',
                            ", status",
                            ', UNIX_TIMESTAMP() - MIN(UNIX_TIMESTAMP(timecreate)) AS max_age',
                            ', COUNT(*) AS count',
                            'FROM api_queue_search_query',
                            'GROUP BY 1, 2',
                            'ORDER BY NULL',
                       ]);

for my $row (@$data) {
    $stat{ $row->{type} }->{status}->{ $row->{status} } = hash_cut($row, @stat_fields);
}

$log->out(\%stat);

# проставляем умолчания для сочетаний тип-статус, которые не выбрались из базы
for my $type (@types) {
    for my $status (@statuses) {
        for my $field (@stat_fields) {
            $stat{ $type }->{status}->{ $status }->{ $field } //= 0;
        }
    }
}

$log->out('send to graphite');
# рисуем графики
local $Yandex::Advmon::GRAPHITE_PREFIX = sub {[qw/direct_one_min db_configurations/, $Settings::CONFIGURATION, qw/flow SearchQueryStatus/]};
monitor_values(\%stat);

$log->out('check borders for monitoring');
# считаем числа, сравниваем с порогами, зажигаем лампочки
for my $type (@types) {
    my $type_stat = $stat{ $type }->{status};
    my $max_age = int(max($type_stat->{New}->{max_age}, $type_stat->{Process}->{max_age}) / 60);
    my $count = $type_stat->{New}->{count} + $type_stat->{Process}->{count};

    juggler_check(service => "direct.SearchQueryReport.$type.unready_count",
                  description => "Количество $type-отчетов, которые еще не готовы",
                  value => $count,
                  crit => $BORDERS_BY_TYPE{ $type }->{count}->{crit},
                  warn => $BORDERS_BY_TYPE{ $type }->{count}->{warn},
    );
    juggler_check(service => "direct.SearchQueryReport.$type.unready_max_age",
                  description => "Максимальный возраст (в минутах) $type-отчета, который еще не готов",
                  value => $max_age,
                  crit => $BORDERS_BY_TYPE{ $type }->{max_age}->{crit},
                  warn => $BORDERS_BY_TYPE{ $type }->{max_age}->{warn},
    );
}

# мониторим работоспособность самого скрипта
juggler_ok();

$log->out("FINISH");
