#!/usr/bin/perl -w
use strict;
use utf8;

use Getopt::Long;

use FindBin;
use lib "$FindBin::Bin/lib";
use File::Copy;

use YtProjectLaunchHelper;

my %opt;

Getopt::Long::Configure ("bundling");
GetOptions(\%opt,
    'src=s',
    'dst=s',
    'spec=s',
    'text=s',
    'title=s',
    'body=s',
    'url=s',
    'phrases=s',
    'help|h',
    'categories|c',
    'flags|f',
    'catids|C',
    'directcatids|d',
    'directids|D',
    'mediagroups|m',
    'mediaids|M',
    'bmapi=s',
    'genconf:s',
);
if ($opt{help}) {
    my $info = <<'END_INFO';
Options:
    YT options:
        --src        source table
        --dst        destination table (optional, same as source by default)
        --spec       override default spec (not recommended)
    Field options:
        Which fields from source table to use for categorization. At least one of the following options is required. Options from more than one group are not allowed.
        Text group:
            Categorization of single text field without special logic for banners
                --text   text field in source table
        Banner group:
            Categorization with special logic for banners
                --title
                --body
                --url
                --phrases
    Output options:
        (optional, all fields except bmapi are calculated by default. If any is specified, only specified fields are calculated)
        -c, --categories    Categories
        -f, --flags         Flags
        -C, --catids        Internal categories IDs
        -D, --directids     Direct IDs for categories and videogroups
        -d, --directcatids  Direct IDs for categories only
        -m, --mediagroups   Mediagroups
        -M, --mediaids      Mediagroups IDs
        --bmapi             comma-separated list of bmapi methods (works only for text)
    Other options:
        --genconf           do not run anything, just generate config for job. Filename is optional, default is standard filename in current dir

Examples:
    standalone_yt_categorize.pl --src=//home/direct/db/banners --title=title --body=body
    standalone_yt_categorize.pl --src=//home/blablabla/table-with-texts --text=text_field

Important notes:
    Environmental variables for YT must be set:
        YT_PROXY
        YT_TOKEN_PATH
        YT_POOL
END_INFO
    print $info;
    exit;
}

my %methods = (
    'text' => ['text'],
    'banner' => ['title', 'body', 'url', 'phrases'],
);

my @output=qw(categories flags catids mediagroups mediaids directids directcatids);

my @chosen_methods = ();
for my $method (keys %methods) {
    foreach my $field ( @{ $methods{$method} } ) {
        if ( exists $opt{$field} ) {
            push @chosen_methods, $method;
            last;
        }
    }
}

if ( scalar @chosen_methods != 1 ) {
    print "ERROR: invalid field list\n";
    exit(1);
}

my $chosen_method = shift @chosen_methods;

if ($chosen_method ne 'text' and $opt{bmapi}) {
    die "ERROR: bmapi methods only work for text\n";
}

my $helper = YtProjectLaunchHelper->new({
    temp_dir => "$FindBin::Bin",
});

my $job_cfg;
for my $field ( @{$methods{$chosen_method}} ) {
    $job_cfg->{input}{$field} = $opt{$field} if $opt{$field};
}
my $default_output = 1;
for my $field ( @output ) {
    $default_output = 0 if $opt{$field};
}
$default_output = 0 if $opt{bmapi};
for my $field ( @output ) {
    push @{$job_cfg->{output}}, $field if $opt{$field} || $default_output;
}

if ( $opt{bmapi} ) {
    push @{$job_cfg->{bmapi}}, $_ foreach split /,/, $opt{bmapi};
}

my $cfg_file = $helper->save_job_cfg_host($job_cfg);

if ( defined $opt{genconf} ) {
    my $dst =  $opt{genconf} || "./";
    copy($cfg_file, $dst);
    exit(0);
}

unless ( $opt{src} ) {
    die "ERROR: source table not specified\n";
}

my $src = $opt{src};

my $dst = $opt{dst} ? $opt{dst} : $src;

my $bytes_per_gigabyte = 1 << 30;
my $bytes_per_megabyte = 1 << 20;
my $tmpfs_size = 8 * $bytes_per_gigabyte;
my $memory_limit = 8 * $bytes_per_gigabyte;
my $data_size_per_job = 10 * $bytes_per_megabyte;

my $spec = $opt{spec} || qq/{"mapper"={"tmpfs_path"="."; "copy_files"=true; "tmpfs_size"=$tmpfs_size; "memory_limit"=$memory_limit;};"data_size_per_job"=$data_size_per_job;}/;

my @cmd = (
    'yt',
    'map',
    "'./mr_perl scripts/standalone_yt_categorize_job.pl'",
    "'--src=$src'",
    "'--dst=$dst'",
    "'--input-format=dsv'",
    "'--output-format=dsv'",
    qq/'--spec=$spec'/,
);

push @cmd, $helper->additional_console_params;
my $yt_cmd = join(' ', @cmd);

print "executing cmd: $yt_cmd\n";

system( $yt_cmd ) or die "ERROR: YT runner error $!\n";
exit(0);
