package Direct::YT::Export::BsLoadProbability;

=head1 NAME
    
    Direct::YT::Export::BsLoadProbability

=head1 DESCRIPTION

    Обработка данных БК о "вероятности загрузки группы в показы""
    https://st.yandex-team.ru/DIRECT-61352

=head1 SYNOPSIS

    PERL5LIB=protected pyt Direct::YT::Export::BsLoadProbability create_bs_snapshot -v tmp_dir=tmp/`whoami` -v source_table=//home/yabs-cs/key_2/v2/data/1482753972705916_1/load/genocide

    PERL5LIB=protected pyt Direct::YT::Export::BsLoadProbability make_new_snapshot_and_calc_diff -v tmp_dir=tmp/`whoami` -v export_dir=tmp/`whoami`/export -v source_table=//home/yabs-cs/key_2/v2/data/1482217268676210_1/load/genocide

    # применить изменения из snapshots_diff таблицы, затем
    PERL5LIB=protected pyt Direct::YT::Export::BsLoadProbability replace_snapshot_and_remove_diff -v tmp_dir=tmp/`whoami` -v export_dir=tmp/`whoami`/export

=cut

use Direct::Modern;

use JSON;

use Yandex::YT;
use Yandex::YT::Streaming;

$Yandex::YT::Streaming::DEFAULT_FORMAT = 'json';

my $DIRECT_ENGINE_ID = 7;
# TODO: поменять на ExportID / cid
my $SKEY = 'OrderID';
my $DKEY = 'OrderID';

my @snapshot_table_schema = (
    {name => $DKEY, type => 'int64', group => 'for_diff'},
    {name => 'pid', type => 'int64', group => 'for_diff'},
);

=head2 create_bs_snapshot

    Создает из $source_table снепшот-таблицу в $tmp_dir

=cut

my @create_bs_snapshot_tasks = (
    [create_table => '$tmp_dir/bs_load_probability',
     schema => [
            @snapshot_table_schema,
            {name => 'LoadProbability', type => 'int64', group => 'for_human'},
        ],
    ],
    [map => 'extract_data',
     src => '$source_table{LoadProbability,ObjectInfo}',
     dst => '$tmp_dir/bs_load_probability',
     spec => 'mapper={layer_paths=["//porto_layers/yt_packages_from_banach.tar.gz"]};'
                .'reducer={layer_paths=["//porto_layers/yt_packages_from_banach.tar.gz"]};'
                .'scheduling_tag_filter=porto',
    ],
    [sort => ['pid'],
     src => '$tmp_dir/bs_load_probability',
     spec => 'partition_count=32',
    ],
    [set => '$tmp_dir/bs_load_probability',
     attr => 'source_path',
     value => '$source_table',
    ],
    [set => '$tmp_dir/bs_load_probability',
     attr => 'source_createtime',
     value => '$source_createtime',
    ],
);

my %create_bs_snapshot_vars = (
    source_table => qr/^[\w\/_-]+$/,
    source_createtime => {
        check => qr/^[0-9:TZ\.\-]+$/,
        default => "0",
    },
    tmp_dir => qr/^[\w\/_-]+$/,
);

job 'create_bs_snapshot',
    vars_spec => \%create_bs_snapshot_vars,
    tasks => \@create_bs_snapshot_tasks,
    ;

=head2 make_new_snapshot_and_calc_diff

    Сначала делает create_bs_snapshot, затем
    создает в $export_dir таблицу с разницей между снепшот-таблицами из $export_dir (old) и $tmp_dir (только что созданной)

=cut

job 'make_new_snapshot_and_calc_diff',
    vars_spec => {
        %create_bs_snapshot_vars,
        export_dir => qr/^[\w\/_-]+$/,
    },
    tasks => [
        @create_bs_snapshot_tasks,
        [create_table => '$export_dir/bs_load_probability_snapshots_diff',
         schema => [
                @snapshot_table_schema,
                {name => 'is_bs_rarely_loaded', type => 'boolean', group => 'for_diff'},
            ],
        ],
        [reduce => 'diff_between_snapshots',
         src => sprintf('
                    $export_dir/bs_load_probability{%s,pid}
                    $tmp_dir/bs_load_probability{%s,pid}
                ', $DKEY, $DKEY),
         dst => '$export_dir/bs_load_probability_snapshots_diff',
         spec => 'mapper={layer_paths=["//porto_layers/yt_packages_from_banach.tar.gz"]};'
                .'reducer={layer_paths=["//porto_layers/yt_packages_from_banach.tar.gz"]};'
                .'scheduling_tag_filter=porto;'
                .'job_count=96',
        ],
    ];

=head2 replace_snapshot_and_remove_diff

    Заменяет снепшот в $export_dir снешпотом из $tmp_dir, и удаляет в $export_diff таблицу с разницей между снепшотами

=cut

job 'replace_snapshot_and_remove_diff',
    vars_spec => {
        tmp_dir => qr/^[\w\/_-]+$/,
        export_dir => qr/^[\w\/_-]+$/,
    },
    tasks => [
        [remove => '$export_dir/bs_load_probability'],
        [move => '',
         src => '$tmp_dir/bs_load_probability',
         dst => '$export_dir/bs_load_probability',
        ],
        [remove => '$export_dir/bs_load_probability_snapshots_diff'],
    ];

mapper 'extract_data',
    sub {
        my ($s, $vars) = @_;
        while (my $r = $s->get()) {
            my $info = $r->{ObjectInfo};
            my $probability = int($r->{LoadProbability} * 100);
            if ($info->{EngineID} == $DIRECT_ENGINE_ID
                && $info->{$SKEY}
                && $info->{GroupExportID}
                && $probability < 100
            ) {
                $s->yield({
                    $DKEY => $info->{$SKEY},
                    pid => $info->{GroupExportID},
                    LoadProbability => $probability,
                });
            }
        }
};

reducer 'diff_between_snapshots',
    reduceby => 'pid',
    sub {
        my ($s, $vars) = @_;
        while(my $g = $s->get_group()) {
            my ($old, $new);
            while(my $r = $s->get()) {
                my $i = $s->{_attrs}->{table_index};
                if ($i == 0) {
                    $old = $r;
                } elsif ($i == 1) {
                    $new = $r;
                }
            }

            if ($old && $new) {
                # no changes
                next;
            } else {
                my $base_rec = $new // $old;
                $s->yield({
                    pid => $base_rec->{pid},
                    $DKEY => $base_rec->{$DKEY},
                    is_bs_rarely_loaded => $new ? JSON::true : JSON::false,
                });
            }
        }
    };

1;

