#!/usr/bin/perl

=encoding UTF-8

=head1 DESCRIPTION

  Скрипт заливает пейджи Дистрибуции на основе YT таблицы //home/yabs/dict/EditPage

  # заливает из подготовленной таблицы
  ./bin/oneshots/PI-27618_add_distribution_pages.pl --over_logs  --ticket=PI-NNNN --yt_table='//home/partner/tmp/zurom/PI-29599/old_distribution_pages_from_edit_page_1'

  # заливает все педжи Дистрибуции которых нет в ПИ, обновленные после <update_time>
  ./bin/oneshots/PI-27618_add_distribution_pages.pl --over_logs  --ticket=PI-NNNN --update_time='1970-01-01 00-00-00'

=cut

use strict;
use warnings;

use File::Basename qw( basename );
use File::stat;

use lib::abs qw(../../lib);
use qbit;
use QBit::StringUtils;
use Utils::Logger qw(WARN WARNF);

use Utils::ScriptWrapper 'oneshot';

use Exception::API::HTTP;

my $CACHE_YQL_RES_PATH = sprintf '%s.cache.json', $0;
my $CACHE_TTL          = 600;
my $CHUNK_SIZE         = 1000;
my $USER_IDS           = {'yndx-robot-di-rsya' => 1572290502};
my $YT_CLUSTER         = 'hahn';
my $YT_TMP_TABLE_PATH  = '//home/partner/%s/distribution_pages_from_edit_page';

run(
    sub {
        my ($app, $opts) = @_;
        my $data = get_pages($app, $opts);
        add_pages($app, $data, $opts) unless $opts->{'dry_run'};
    }
   );

sub get_pages {
    my ($app, $opts) = @_;

    my $data = get_cached_data($CACHE_YQL_RES_PATH, $opts, $CACHE_TTL);
    if (!$data || $opts->{reset_cache}) {
        save_yql_to_tmp_table($app, $opts) unless $opts->{yt_table};
        $data = read_yt_table($app);
        update_cache_data($CACHE_YQL_RES_PATH, $data) if $data;
    }

    print logstr sprintf('Distribution pages found %d (%d steps)', scalar(@$data), int(scalar(@$data) / $CHUNK_SIZE));

    return $data;
}

sub add_pages {
    my ($app, $data, $opts) = @_;

    my $start_time = time();

    my $only_pages = undef;
    if ($opts->{page_ids}) {
        $only_pages = {map {$_ => 1} split(/,/, $opts->{page_ids})};
    }

    my $chunks_count = 0;
    my $count        = 0;
    my @pages_chunk  = ();
    foreach my $row (@$data) {
        next if defined $only_pages  && !$only_pages->{$row->{PageID}};
        next if $opts->{min_page_id} && $row->{PageID} <= $opts->{min_page_id};
        next if $opts->{max_page_id} && $row->{PageID} >= $opts->{max_page_id};

        $count++;

        push @pages_chunk, $row;

        if (@pages_chunk >= $CHUNK_SIZE) {
            $chunks_count++;
            add_pages_chunk($app, \@pages_chunk, $chunks_count, $count, scalar(@$data));
            @pages_chunk = ();
        }

        last if $opts->{limit} && $count >= $opts->{limit};
    }

    add_pages_chunk($app, \@pages_chunk, $chunks_count, $count, scalar(@$data)) if @pages_chunk;

    print logstr sprintf('Distribution pages added %d (takes %d min)', $count, int((time() - $start_time) / 60));

    return 1;
}

sub add_pages_chunk {
    my ($app, $pages_chunk, $chunks_count, $count, $total_pages) = @_;

    my $start_time = time();

    my $bulk = [];
    foreach my $row (@$pages_chunk) {

        if (exists $row->{bk_data}->{Places}) {
            # в YT лежит строка "", а должен быть объект {}
            $row->{bk_data}->{Places} = {} unless $row->{bk_data}->{Places};
        }

        # Удаляем автодобавленые при отправке из ПИ поля
        for my $key (qw( Login PageID PageCaption ProductID CreateDate UpdateTimePI PIEditTime IsRegularUpdate)) {
            delete $row->{bk_data}->{$key};
        }

        # Удаляем дефолты
        delete $row->{bk_data}->{TargetType} if ($row->{bk_data}->{TargetType} && $row->{bk_data}->{TargetType} == 8);
        delete $row->{bk_data}->{DirectBlocks}
          if ($row->{bk_data}->{DirectBlocks} && !%{$row->{bk_data}->{DirectBlocks}});
        delete $row->{bk_data}->{RtbBlocks} if ($row->{bk_data}->{RtbBlocks} && !%{$row->{bk_data}->{RtbBlocks}});
        delete $row->{bk_data}->{Slots}     if ($row->{bk_data}->{Slots}     && !@{$row->{bk_data}->{Slots}});

        push @$bulk,
          {
            id         => $row->{PageID},
            page_id    => $row->{PageID},
            multistate => 0,
            caption    => sprintf('Distribution page %d', $row->{PageID}),
            creator_id => $USER_IDS->{'yndx-robot-di-rsya'},
            owner_id   => $USER_IDS->{'yndx-robot-di-rsya'},
            patch      => to_json($row->{bk_data}, pretty => 1, canonical => 1),
          };
    }

    $app->partner_db->transaction(
        sub {
            $app->partner_db->distribution_campaign->add_multi($bulk, duplicate_update => TRUE);
        }
    );

    my $pages_left = $total_pages - $count;

    print logstr sprintf(
        'Step %d. Added %d pages (takes %d sec, %d left): %s',
        ($chunks_count + 1),
        scalar(@$bulk),
        (time() - $start_time),
        ($pages_left > 0 ? $pages_left : 0),
        join(',', map {$_->{page_id}} grep {$_} @$bulk)
    );

    return 1;
}

sub save_yql_to_tmp_table {
    my ($app, $opts) = @_;

    my $start_time = time();
    print logstr 'Start saving YQL to tmp table (estimation ~5min)';

    my $query_yql = sprintf q[
        INSERT INTO `%s` WITH TRUNCATE
        SELECT    PageID,
                  DateTime::Format("%%Y-%%m-%%d %%H:%%M:%%S")(AddTimezone(DateTime::FromSeconds( Cast(UpdateTime AS UInt32) ), 'Europe/Moscow')) as UpdateTime,
                  Data
        FROM      `//home/yabs/dict/EditPage`
        WHERE     CAST(JSON_VALUE(CAST(Data as Json), "$.TargetType") as Int8) = 8
                  AND DateTime::Format("%%Y-%%m-%%d %%H:%%M:%%S")(AddTimezone(DateTime::FromSeconds( Cast(UpdateTime AS UInt32) ), 'Europe/Moscow')) >= '%s'
        ORDER BY  PageID
    ], $YT_TMP_TABLE_PATH, $opts->{update_time} || '1970-01-01 00:00:00';

    WARN $query_yql;

    my $yql_operation_result = $app->api_yql->yql_start_operation_and_get_result(
        clusters     => [$YT_CLUSTER],
        start_params => {params => {content => $query_yql,}},
        get_params   => {
            format        => 'json',
            sleep_seconds => 30,
            want_result   => 0,
        }
    );

    print logstr sprintf('End of saving YQL to tmp table (takes %s sec)', (time() - $start_time));

    return 1;
}

sub read_yt_table {
    my ($app) = @_;

    my $start_time = time();
    print logstr "Start reading YT table '$YT_TMP_TABLE_PATH' (estimation ~10 sec)";

    $app->api_yt->read_table(
        host    => $YT_CLUSTER,
        path    => $YT_TMP_TABLE_PATH,
        headers => {'X-YT-Parameters' => '{output_format=<encode_utf8=%false>json}',},
        params  => {
            timeout         => 300,
            attempts        => 1,
            delay           => 0,
            timeout_retry   => 0,
            ':content_file' => $CACHE_YQL_RES_PATH
        },
    );

    my $data = from_jsonl(readfile($CACHE_YQL_RES_PATH));

    foreach my $row (@$data) {
        $row->{bk_data} = from_json(delete $row->{Data});
    }

    print logstr sprintf('End reading YT table (takes %s sec)', (time() - $start_time));

    return $data;
}

sub update_cache_data {
    my ($cache_file_path, $data) = @_;
    writefile($cache_file_path, to_json($data, pretty => TRUE));
    WARNF 'cache file updated %s', $cache_file_path;
}

sub get_cached_data {
    my ($cache_file_path, $args, $ttl_sec) = @_;

    my $is_use_cache = $args->{use_cache} || $args->{force_cache};

    my $data;
    if ($is_use_cache && -e $cache_file_path) {
        my $cache_timestamp = File::stat::stat($cache_file_path)->mtime;
        my $cur_timestamp   = time();
        my $sec_left        = $cur_timestamp - $cache_timestamp;

        if ($args->{force_cache} || $sec_left < $ttl_sec) {
            $data = from_json(readfile($cache_file_path));
        }
    }

    $data = undef unless ($data && @$data);
    WARN 'Got data from cache file ' . $CACHE_YQL_RES_PATH if $data;

    return $data;
}

sub prepare_args {
    my ($opts) = @_;

    if ($opts->{yt_table}) {
        $YT_TMP_TABLE_PATH = $opts->{yt_table};
    } else {
        $YT_TMP_TABLE_PATH = sprintf($YT_TMP_TABLE_PATH, $opts->{ticket});
    }
}

sub args {
    my ($opts) = @_;

    return (
        'page_ids:s'    => \$opts->{page_ids},
        'min_page_id:s' => \$opts->{min_page_id},
        'max_page_id:s' => \$opts->{max_page_id},
        'limit:s'       => \$opts->{limit},
        'update_time:s' => \$opts->{update_time},
        'yt_table=s'    => \$opts->{yt_table},
        'use_cache!'    => \$opts->{use_cache},
        'force_cache!'  => \$opts->{force_cache},
        'reset_cache!'  => \$opts->{reset_cache},
    );
}
