#!/usr/bin/perl -w
use strict;

use utf8;
use open ':utf8';

use FindBin;
use lib "$FindBin::Bin/../../lib";

use Project;

use Utils::Common;
use Utils::Sys;

my $proj = Project->new({});


my $tries = 5;
my $sleep_between_tries = 120;

my $yt_client = $proj->yt_client->set_params(
    pool                => 'bannerland',
    tries               => $tries,
    sleep_between_tries => $sleep_between_tries,
);

my $src_table = '//home/catalogia/dse_queries_rysmyatova';
my $dst_table = '//home/bannerland/tmp/dse_filtered_queries_rysm';

my $schema = '<enable_type_conversion=true;schema=[{name = exact_hits; type = int64};{name = hits; type = int64};{name = norm; type = string};{name = url; type = string}]>';

my $bytes_per_gigabyte = 1 << 30;
my $tmpfs_size = 5 * $bytes_per_gigabyte;
my $memory_limit = 8 * $bytes_per_gigabyte;
my $reserve_lower_bound = 0.9; # ~ 4G tmpfs + 2G proc / 8G - memory we will need in all jobs ++
my $reserve_probable = 0.95; # ~ 4.5G tmpfs + 2.5G proc / 8G - memory enough for the most jobs ++
my $data_size_per_job = 100*(1<<20);

my $memory_subspec = qq/"memory_limit"=$memory_limit; "memory_reserve_factor"=$reserve_probable; "user_job_memory_digest_lower_bound"=$reserve_lower_bound;/;
# tmpfs without copy, because we remove untared files - no need for extra copy
my $tmpfs_subspec = qq/"tmpfs_path"="."; "tmpfs_size"=$tmpfs_size;/;

$yt_client->do_project_cmd(
    'map',
    "'./mr_perl ./filter_searchcount_yt.pl'",
    "--local-file=$Utils::Common::options->{'dirs'}{'scripts'}/users/apovetkin/filter_searchcount_yt.pl",
    "--src=$src_table",
    "'--dst=$schema$dst_table'",
    "'--input-format=dsv'",
    "'--output-format=json'",
    "'--spec={", join(" ",
        qq/"mapper"={$tmpfs_subspec $memory_subspec};/,
        qq/"data_size_per_job"=$data_size_per_job;/,
    ), "}'",
);


