#!/usr/bin/perl

=head1 METADATA

<crontab>
    time: * * * * *
    package: scripts-switchman
    <switchman>
        group: scripts-other
    </switchman>
</crontab>

<crontab>
    time: * * * * *
    <switchman>
        group: scripts-test
    </switchman>
    package: conf-test-scripts
</crontab>

<juggler>
    host:   checks_auto.direct.yandex.ru
    ttl:            10m
    tag: direct_group_internal_systems
</juggler>
<juggler_check>
    host:   checks_auto.direct.yandex.ru
    name:           scripts.ppcReSharder.lock_max_age
    raw_events:     scripts.ppcReSharder.lock_max_age.production
    ttl:            12m
    tag: direct_group_internal_systems
    tag: direct_queues
    <notification>
        template: on_status_change
        status: OK
        status: CRIT
        method: telegram
        login: DISMonitoring
    </notification>
</juggler_check>
<juggler_check>
    host:   checks_auto.direct.yandex.ru
    name:           scripts.ppcReSharder.errors
    raw_events:     scripts.ppcReSharder.errors.production
    ttl:            12m
    tag: direct_group_internal_systems
    tag: direct_queues
    <notification>
        template: on_status_change
        status: OK
        status: CRIT
        method: telegram
        login: DISMonitoring
    </notification>
</juggler_check>


=cut


=head1 NAME

ppcReSharderMonitor.pl - Мониторинг выполнения заявок на перенос клиентов из ppcdict.reshard_queue

=cut


use strict;
use warnings;

use Yandex::Advmon;
use Yandex::HashUtils;
use Yandex::DBTools;

use my_inc "..";

use Settings;
use ScriptHelper 'Yandex::Log' => 'messages';

my $CRIT_LOCK_TIME = 7200;
my $CRIT_ERRORS = 1;

extract_script_params();

main();
exit;


sub main {
    $log->out("START");

    my $stats = calc_queue_stats();
    $log->out($stats);

    local $Yandex::Advmon::GRAPHITE_PREFIX = sub {[qw/direct_one_min db_configurations/, $Settings::CONFIGURATION]};
    monitor_values({ 'objects.resharding_queue' => $stats });
    
    juggler_check(
        service => "scripts.ppcReSharder.lock_max_age",
        description => 'Maximum age of clients lock',
        value => $stats->{max_lock_age},
        crit => $CRIT_LOCK_TIME,
    );

    juggler_check(
        service => "scripts.ppcReSharder.errors",
        description => 'Number of errors in queue',
        value => $stats->{error},
        crit => $CRIT_ERRORS,
    );

    juggler_ok();
}


=head2 calc_queue_stats()

    посчитать статистику по reshard_queue

=cut
sub calc_queue_stats {
    my $stats = get_one_line_sql(PPCDICT, "
                            SELECT count(*) as total
                                 , IFNULL(sum(status = 'new'),0) as new
                                 , IFNULL(sum(status = 'done'),0) as done
                                 , IFNULL(sum(status = 'error'),0) as error
                                 , IFNULL(sum(status not in ('new', 'done', 'error')),0) as in_process
                              FROM reshard_queue                            
                            ");

    my $lock_stats = get_one_line_sql(PPCDICT, "
                            SELECT unix_timestamp() - unix_timestamp(ifnull(min(last_step_time), now())) max_lock_age
                              FROM reshard_queue                            
                             WHERE status not in ('new', 'dumped', 'done', 'error')
                            ");
 
    return hash_merge {}, $stats, $lock_stats;
}

