#!/usr/bin/perl -w
use strict;

use utf8;
use open ':utf8';

use FindBin;
use lib "$FindBin::Bin/../lib";

use Project;

use Utils::Common;
use Utils::Sys;

Utils::Sys::get_file_lock() or exit(0);
Utils::Sys::handle_errors();

my $proj = Project->new({});

my $tries = 5;
my $sleep_between_tries = 120;

my $yt_client = $proj->yt_client->set_params(
    pool                => 'catalogia',
    tries               => $tries,
    sleep_between_tries => $sleep_between_tries,
);

my $src_table = '//home/direct/export/bm/bm_banners';
my $shuffled_src_table = "//tmp/bm_banners_shuffled_$$";
my $unsorted_dst_table = '//home/catalogia/tmp/banners-recategorized-unsorted';
my $dst_table = '//home/catalogia/banners-recategorized-fast';
my $bytes_per_gigabyte = 1 << 30;
my $tmpfs_size = 8 * $bytes_per_gigabyte;
my $memory_limit = 8 * $bytes_per_gigabyte;
my $data_size_per_job = 100*(1<<20);
chomp(my $modification_time = $yt_client->read_cmd('get', "$src_table/\@upload_time", '--format=dsv'));

$yt_client->shuffle($src_table, $shuffled_src_table);

$yt_client->do_project_cmd(
    'map',
    "'./mr_perl ./categorize_banners_yt.pl'",
    "--local-file=$Utils::Common::options->{'dirs'}{'scripts'}/banners_categories/categorize_banners_yt.pl",
    "--src=$shuffled_src_table",
    "--dst=$unsorted_dst_table",
    "'--input-format=$Utils::Common::options->{'DirectClient'}{'yt_banners_format'}'",
    "'--output-format=<enable_string_to_all_conversion=true;columns=[bid;cid;pid;BannerID;lang;Categories;Flags;CategoryIDs;Mediagroups;UpdateTime;minicategs_ids;]>schemaful_dsv'",
    qq/'--spec={"mapper"={"tmpfs_path"="."; "copy_files"=true; "tmpfs_size"=$tmpfs_size; "memory_limit"=$memory_limit;};"data_size_per_job"=$data_size_per_job;}'/,
);

$yt_client->do_cmd('sort', "--src=$unsorted_dst_table", "--dst=$dst_table", '--sort-by=bid');

my $value = JSON::to_json({$src_table => $modification_time});
$yt_client->do_cmd(
    'set',
    "$dst_table/\@sources_modification_time",
    '--format=json',
    "--value='$value'",
);

$yt_client->set_upload_time($dst_table);

Utils::Sys::release_file_lock();
