#!/usr/bin/perl -w
use strict;

use utf8;
use open ':utf8';

use FindBin;
use lib "$FindBin::Bin/../../lib";

use Project;

use Utils::Common;
use Utils::Sys;
use Getopt::Long;

Utils::Sys::get_file_lock() or do {
    Utils::Sys::print_err("found already running script, do exit");
    exit(0);
};
Utils::Sys::handle_errors();

main();
Utils::Sys::release_file_lock();
exit(0);

sub main {

    my $src_table = "//home/advq/broadmatch/rus";
    my $dst_table = "//home/catalogia/advq-phrases/advq-phrases-categorized";

    my $proj = Project->new({});
    my $tries = 5;
    my $sleep_between_tries = 120;

    my $yt_client = $proj->yt_client->set_params(
        pool                => 'catalogia',
        tries               => $tries,
        sleep_between_tries => $sleep_between_tries,
    );

    $src_table .= "/" . $yt_client->list($src_table)->[0];
    $src_table .= "/" . $yt_client->list($src_table)->[0];

    my $schema = '<schema=[{name = phrase; type = string}; {name = hits; type = int64}; {name = mctgs; type = string}]>';

    my $bytes_per_gigabyte = 1 << 30;
    my $tmpfs_size = 8 * $bytes_per_gigabyte;
    my $memory_limit = 9 * $bytes_per_gigabyte;
    my $data_size_per_job = 20*(1<<20);

    $yt_client->do_project_cmd(
        'map',
        "'./mr_perl ./categorize_phrases_from_advq_yt.pl'",
        "--local-file=$Utils::Common::options->{'dirs'}{'scripts'}/catalogia/categorize-advq-request-yt/categorize_phrases_from_advq_yt.pl",
        "--src=$src_table",
        "'--dst=$schema$dst_table'",
        "'--input-format=<enable_escaping=false;columns=[OrigSanitized;Hits]>schemaful_dsv'",
        "'--output-format=<enable_escaping=false;enable_string_to_all_conversion=true;columns=[phrase;hits;mctgs]>schemaful_dsv'",
        qq/'--spec={"mapper"={"tmpfs_path"="."; "copy_files"=true; "tmpfs_size"=$tmpfs_size; "memory_limit"=$memory_limit;};"data_size_per_job"=$data_size_per_job;}'/,
    );
}
