#!/usr/bin/perl


=head1 METADATA

<crontab>
    env: YT_DIRECT_CLUSTER=hahn
    time: */59 * * * *
    params: --generate --no-upload
    <switchman>
        lockname: ppcMonitorStats.pl.hahn.generate
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<crontab>
    env: YT_DIRECT_CLUSTER=arnold
    time: */59 * * * *
    params: --generate --no-upload
    <switchman>
        lockname: ppcMonitorStats.pl.arnold.generate
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<crontab>
    env: YT_DIRECT_CLUSTER=prod
    time: */59 * * * *
    params: --no-generate --upload
    <switchman>
        lockname: ppcMonitorStats.pl.prod.upload
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<juggler>
    host:   checks_auto.direct.yandex.ru
    raw_events:     scripts.ppcMonitorStats.working.$yt_cluster.generate
    raw_events:     scripts.ppcMonitorStats.working.prod.upload
    vars:           yt_cluster=hahn,arnold
    ttl:            4h
    tag:            direct_yt
    tag: direct_group_internal_systems
</juggler>

<juggler_check>
    host:           checks_auto.direct.yandex.ru
    name:           scripts.ppcMonitorStats.data_freshness
    raw_events:     scripts.ppcMonitorStats.working.prod.data_freshness.production
    ttl:            4h
    tag:            direct_yt
    tag: direct_group_internal_systems
</juggler_check>

=cut

=head1 NAME

    ppcMonitorStats.pl

=head1 DESCRIPTION

    Подсчёт различных метрик в YT, выгрузка в monitor_targets

=cut

use Direct::Modern;

use Yandex::Shell;
use Yandex::DateTime;
use Yandex::YT::Table;
use Yandex::YT::TableReader;

use my_inc '..';

my $YT_CLUSTER;
BEGIN {
    $YT_CLUSTER = $ENV{YT_DIRECT_CLUSTER} // 'prod';
}

use Settings;
use ScriptHelper 'Yandex::Log' => 'messages', get_file_lock => ['dont_die', "ppcMonitorStats.$YT_CLUSTER"];
use Tools;
use Property;
use Monitor;
use EnvTools qw/is_beta/;

my ($FORCE, $CIDS_RANGE);
my ($GENERATE, $UPLOAD) = (1, 1);
extract_script_params(
    "cids-range=s" => \$CIDS_RANGE,
    "force" => \$FORCE,
    "generate!" => \$GENERATE,
    "upload!" => \$UPLOAD,
);

my $export_dir = 'export';
my $tmp_dir = 'tmp';
if (is_beta()) {
    $export_dir = "tmp/$ENV{USER}";
    $tmp_dir = "tmp/$ENV{USER}";
}


my $export_path = "$export_dir/monitor_stats";

$log->msg_prefix("[$YT_CLUSTER]");
$log->out("start");

$log->out("setup YT environment");
Tools::force_set_yt_environment();
Yandex::YT::Table->new($_)->create("map_node") for $export_dir, $tmp_dir;


if ($GENERATE) {
    calc_in_yt();
}

if ($UPLOAD) {
    my $upload_time = upload_to_monitor_targets();
    my $data_age = (now_utc()->epoch - datetime($upload_time=~s/\D//gr)->epoch) / (24*60*60);

    $log->out("data_age: $data_age");
    juggler_check(value => $data_age, warn => 1.3, crit => 2.3,
                  service_suffix => "$YT_CLUSTER.data_freshness",
                  description => "Актуальность данных (в днях) по которым рассчитана статистика",
                  );
}

$log->out("finish");


=head2 calc_in_yt

    Запуск в YT задачи на подсчёт статистики, если результирующая табличка старше слепка базы

=cut
sub calc_in_yt {
    my $db_upload_time = Yandex::YT::Table->new('//home/direct/db/campaigns')->get_attribute('upload_time');
    $log->out("db_upload_time=$db_upload_time");

    my $export_tbl = Yandex::YT::Table->new($export_path);
    if ($export_tbl->exists() && ($export_tbl->get_attribute('upload_time')//'') eq $db_upload_time && !$FORCE) {
        my $msg = "table $export_path already exists and up-to-date";
        $log->out($msg);
        juggler_ok(service_suffix => "$YT_CLUSTER.generate", description => $msg);
        return;
    }

    my $date_to = date(substr $db_upload_time, 0, 10);

    my @cmd = ('pyt', '--path' => my_inc::path('.'),
                'Direct::YT::Export::MonitorStats', 'main', 
                '-v', "export_path=$export_path",
                '-v', "tmp_dir=$tmp_dir",
                ($CIDS_RANGE ? (-v => "cids_range=[$CIDS_RANGE]") : ()),
    );

    $log->out("start calc in yt" => \@cmd);
    my $profile = Yandex::Trace::new_profile('yt:MonitorStats');
    yash_system(@cmd);

    $export_tbl->set_attribute('upload_time', $db_upload_time);
    juggler_ok(service_suffix => "$YT_CLUSTER.generate", description => "successfully calculated");
    $log->out("calculation finished");
}


=head2 upload_to_monitor_targets

    Выгрузка данных из YT и загрузка в базу
    Возвращает дату/время, на которое данные актуальны

=cut
sub upload_to_monitor_targets {
    my $export_tbl = Yandex::YT::Table->new($export_path);
    my $export_time = $export_tbl->get_attribute('upload_time') // '';

    my $last_upload_prop = Property->new('MonitorStats');
    my $last_upload_time = $last_upload_prop->get() // '';

    $log->out("last upload time: $last_upload_time, export time: $export_time");

    if (!$FORCE && $export_time le $last_upload_time) {
        my $msg = "Data is up-to-date, skip upload";
        $log->out($msg);
        juggler_ok(service_suffix => "$YT_CLUSTER.upload", description => $msg);
        return $last_upload_time;
    }

    my $reader = Yandex::YT::TableReader->new("${export_path}{metric,val,desc}", format => 'json');
    while(my $rec = $reader->next()) {
        $log->out($rec);
        Monitor::save_target_value($rec->{metric}, {value => $rec->{val}, description => $rec->{desc}});
    }

    $last_upload_prop->set($export_time);
    juggler_ok(service_suffix => "$YT_CLUSTER.upload", description => "successfully uploaded to monitor_targets");
    return $export_time;
}
