#!/usr/bin/perl

use strict;
use warnings;
use autodie;

use open ':utf8';

BEGIN { $ENV{BM_NO_PATCH} = "debug_env" }

use FindBin;
use lib "$FindBin::Bin/../../lib";

use Data::Dumper;
use List::MoreUtils qw(uniq);
use Encode qw(encode_utf8 decode_utf8);
use JSON::XS;
use List::Util qw(minstr);
use File::Path qw(make_path);

use Project;
use Utils::Sys qw(handle_errors load_json do_safely);
use Utils::Hosts qw(get_hosts);


handle_errors(DIE => {stack_trace => 1});

my $proj = Project->new();

my $result = find_interesting_tasks($proj, 'dyn', 300);
$proj->dd($result->{seen});

my $dir = 'find_tasks_coverage/run00';
make_path($dir);
for my $data (@{$result->{task_data}}) {
    print STDOUT "task $data->{task_id}: $data->{info}{domain_dir_name}: ".encode_json($data->{add})."\n";

    my $file = "$dir/$data->{task_id}.taskjson";
    open my $fh, '>', $file;
    print $fh $data->{file}{taskjson};
    close $fh;
}

$proj->log('all done!');

exit(0);


sub get_task_data {
    my $proj = shift // die;
    my $task_type = shift // die;
    my $task_id = shift // die;

    my $table = ($task_type eq 'dyn') ? "DynTasks" : "PerfTasks";
    my $dbt = $proj->dbtable($table, 'TaskID', 'bannerland_dbh');
    my $task_info = $dbt->Get($task_id);
    my $host = $task_info->{host};

    # get expt
    my %file;
    my $file_types = ($task_type eq 'dyn') ? [ 'taskjson', 'task.err' ] : [ 'taskjson', 'expt_send' ];
    my $dmn_dir = $task_info->{domain_dir_name};
    for my $postfix (@$file_types) {
        my $res = do_safely(sub {
            my $fname = $proj->read_sys_cmd_bash_remote(
                $host, "find /opt/broadmatching/temp/${task_type}_banners/$dmn_dir | grep '/$task_id' | grep -P '$postfix\$' | sort | tail -n1"
            );
            return if !$fname;
            chomp $fname;
            my $data = $proj->read_sys_cmd_bash_remote($host, "cat $fname");
            return if !$data;
            $file{$postfix} = $data;
            return 1;
        }, timeout => 20, no_die => 1, die_if_timeout => 0);

        return if !$res;
    }
    
    return { task_id => $task_id, host => $host, info => $task_info, file => \%file };
}

# не зависит от типа таски
sub get_adv_types_list {
    my $proj = shift;

    my @product_classes = map { m@/([^/]+)\.pm@; $1 } glob $proj->options->{scripts} . "/lib/BM/BannersMaker/Product*.pm";

    my @adv_types;
    for my $classname (map { "BM::BannersMaker::$_" } @product_classes) {
        no strict;
        eval "require $classname" or die $@;
        next unless UNIVERSAL::can($classname, "ad_type");
        push @adv_types, $classname->ad_type();
    }

    return uniq @adv_types;
}

sub get_target_funnels {
    my $task = shift;
    my $target_params = $task->taskinf->{Resource}->{TargetsParams};
    return uniq map { $_->{target_funnel} } values %$target_params;
}

sub get_perf_adv_types {
    my $info_column = 3;
    my $tsv = shift;
    my %adv_types;
    for my $line (split m/\n/, $tsv) {
        my @cols = split m/\t/, $line;
        my $offer_info_json = $cols[$info_column] or next;
        my $offer_info = eval { decode_json(encode_utf8($offer_info_json)) } // {};
        $adv_types{$offer_info->{"adv_type"}}++ if defined $offer_info->{"adv_type"};
    }
    return keys %adv_types;
}

sub get_dyn_adv_types {
    my $log = shift;
    for my $line (split /\n/, $log) {
        if ($line =~ /seen_ad_type:\s*(.*)$/) {
            my $ad_type_str = $1;
            return split /,/, $ad_type_str;
        }
    }
    return ();
}

sub analyze_task_data {
    my $proj = shift;
    my $task_type = shift;
    my $task_data = shift;

    my %seen;
    if ($task_type eq 'perf') {
        $seen{target_funnels} = [ decode_json(encode_utf8($task_data->{file}{taskjson})) ];
        $seen{adv_types} = [ get_perf_adv_types($task_data->{file}{expt_send}) ];
    } else {
        $seen{adv_types} = [ get_dyn_adv_types($task_data->{file}{'task.err'}) ];
        my $src_type;
        if ($task_data->{info}{IsFeed}) {
            $src_type = 'feedurl';
        } elsif ($task_data->{info}{IsSpecURL}) {
            $src_type = 'specurl';
        } else {
            $src_type = 'domain';
        }
        $seen{src_types} = [ $src_type ];
    }
    
    return \%seen;
}

sub get_fast_tasks {
    my $proj = shift;
    my $task_type = shift;
    my $count = shift // 100;
    my $max_interval = shift // "interval 1 hour";
    my $res_range = shift // [30,3000];

    my $tbl = $task_type eq 'perf' ? 'PerfTasks' : 'DynTasks';
    
    my $rows = $proj->bannerland_dbh->List_SQL("
        select domain_dir_name, TaskID
        from $tbl
        where begin + $max_interval > end
            and resultcount >= $res_range->[0]
            and resultcount <= $res_range->[1]
        order by end desc
    ");

    # удалим дубли по доменам
    my %seen_domain;
    $rows = [ grep { my $d = $_->{domain_dir_name}; $d and !$seen_domain{$d}++ } @$rows ];

    my @ids = map { $_->{TaskID} } @$rows;
    return splice(@ids, 0, $count);
}

sub _adds_new {
    my $base = shift;
    my $add = shift;
    for my $key (@$add) {
        return 1 unless $base->{$key};
    }
    return 0;
}

sub _add {
    my $base = shift;
    my $add = shift;
    for my $key (@$add) {
        $base->{$key}++;
    }
    return $base;
}

sub find_interesting_tasks {
    my $proj = shift;
    my $task_type = shift;
    my $try_count = shift;
    
    my %seen;  # сущности, по которым отбираем таски
    if ($task_type eq 'perf') {
        my @adv_types = get_adv_types_list($proj);
        $seen{adv_types}{$_} = 0 for @adv_types;
        my @target_funnels = qw/product_page_visit same_products new_auditory/;
        $seen{target_funnels}{$_} = 0 for @target_funnels;
    } else {
        my @adv_types = get_adv_types_list($proj);
        $seen{adv_types}{$_} = 0 for @adv_types;
        $seen{src_types}{$_} = 0 for qw(feedurl specurl domain);
    }

    my @task_data;
    
    my @all_task_ids = get_fast_tasks($proj, $task_type, $try_count);
    my $cnt = 0;
    my $add_cnt = 0;
    my $tot_cnt = @all_task_ids;
    for my $task_id (@all_task_ids) {
        $cnt++;
        $proj->log("Try task $task_id ... ($cnt of $tot_cnt; added: $add_cnt)");

        my $task_data = get_task_data($proj, $task_type, $task_id)
            or next;

        my $add = analyze_task_data($proj, $task_type, $task_data)
            or next;

        my $does_add = 0;
        for my $add_type (keys %seen) {
            if (_adds_new($seen{$add_type}, $add->{$add_type})) {
                $does_add = 1;
            }
        }
        if (!$does_add) {
            $proj->log("Task $task_id does not add");
            next;
        }
        
        $add_cnt++;
        $proj->log("Task $task_id adds!");
        
        for my $add_type (keys %seen) {
            _add($seen{$add_type}, $add->{$add_type});
        }

        $proj->log("Current stats: " . Dumper(\%seen));
        $task_data->{add} = $add;
        push @task_data, $task_data;   

        last if not grep { $_ == 0 } map { values %{$seen{$_}} } keys %seen;
    }

    return {
        task_data => \@task_data,
        seen => \%seen,
    };
}
