#!/usr/bin/perl

use strict;
use warnings;

use open ':utf8';

use JSON qw(from_json);
use Getopt::Long;
use Parallel::ForkManager;
use File::Slurp qw(write_file);

use FindBin;
use lib "$FindBin::Bin/../../lib";

use Utils::Sys qw(
    save_json load_json
    dir_files lines_count
    handle_errors
);
use BM::BannersMaker::Tasks::TaskUtils qw(get_task_file);

use Project;

handle_errors(DIE => {stack_trace => 1});

my %opt;
GetOptions(\%opt, 'help|h', 'conf=s');

my $bin_dir = $FindBin::Bin;
my $default_conf = "$bin_dir/test.conf";

if ($opt{help}) {
    printf "Usage: $0 [Options] [files ...]\n";
    printf "Generate fake feed files for fixtures in conf\n";
    printf "Options:\n";
    printf "  --conf          config file with task list\n";
    printf "                    default: $default_conf\n";
    exit;
}

$opt{conf} //= $default_conf;

my $proj = Project->new({
    load_dicts => 1,
    load_minicategs_light => 1,
    use_comptrie_subphraser => 1,
});

my $pm = Parallel::ForkManager->new(8);
my $conf = load_json($opt{conf}, { relaxed => 1})
    or die "Can't load config file `$opt{conf}'";

my @types = qw(perf dyn);
for my $type (@types) {
    my $dir = "$bin_dir/fixtures/${type}";
    my @task_ids = @{$conf->{$type}{fixtures} // []};
    for my $task_id (@task_ids) {
        $pm->start() and next;
        my $task_dir = "$dir/$task_id";
        mkdir $task_dir if !-d $task_dir;
        my $task_file = "$task_dir/taskjson";
        if (!-f $task_file) {
            get_task_file($proj, $type, $task_id, $task_file);
        }
        my $task_inf = load_json($task_file);
        my $task_obj;
        if ($type eq 'perf') {
            $task_obj = $proj->perftask($task_inf);
        } else {
            $task_obj = $proj->dyntask($task_inf);
        }
        $proj->log("processing $type task $task_id from file `$task_file' ...");

        if ($type eq 'perf') {
            my $fd = $task_obj->get_feed_nofilters;
            save_feed_to_dir($fd, $task_dir, num_lines => 200);
        } else {
            # см. get_feeddata
            my $num_lines = 300;
            my $dse_lines = 200;
            my $specurl_lines = 100;
            my $feedurl_lines = 500;

            my $product_domain = $task_obj->domain;
            if (keys %{$task_obj->filters}) {
                if ($task_obj->feedurl) {
                    $proj->log("feedurl found");
                    my $offers_feed = $proj->feed({
                        url => $task_obj->feedurl,
                        filters => $task_obj->filters,  # нужен для first_product_url
                        $task_inf->{Resource}{Login} ? ( login => $task_inf->{Resource}{Login} ) : (),
                        $task_inf->{Resource}{Password} ? ( pass => $task_inf->{Resource}{Password} ) : (),
                        # feed_src_label используется только при генерации
                    });
                    save_feed_to_dir($offers_feed, $task_dir, num_lines => $feedurl_lines);
                    $offers_feed->iter_init;
                    my $first_product_url = $offers_feed->get_first_product_url;
                    if ($first_product_url) {
                        my $domain = $proj->page($first_product_url)->domain;
                        $product_domain = $domain if $domain;
                    }
                } else {
                    # фид из обходчика
                    $proj->log("renew_tskv");
                    $task_obj->renew_tskv;
                    my $file_tskv_gen = $task_obj->filenames->{"tskv_gen"};
                    
                    $proj->do_sys_cmd(
                        "cat $file_tskv_gen | grep ^product_type=dse | shuf -n $dse_lines > $task_dir/tskv_gen",
                        no_die => 1,
                    );
                    $proj->do_sys_cmd(
                        "cat $file_tskv_gen | grep ^product_type=dse -v | shuf -n $num_lines >> $task_dir/tskv_gen",
                        no_die => 1,
                    );
                }
            }

            my @specurl_offers_inf = $task_obj->specurls_offers_fd(ret_inf => 1);
            if (@specurl_offers_inf) {
                $proj->log("specurls found");
                my $idx = 0;
                for my $inf (@specurl_offers_inf) {
                    my @lines = split /\n/, (delete $inf->{data});
                    my $data = join('', map { $_."\n" } splice(@lines, 0, $specurl_lines));
                    $inf->{data_file_name} = "specurl-data-$idx";
                    write_file("$task_dir/$inf->{data_file_name}", {binmode => 'utf8'}, $data);
                }
                save_json(\@specurl_offers_inf, "$task_dir/specurl_offers_inf");
            }

            # фид из dse
            if ($task_obj->feedurl || @specurl_offers_inf) {
                my $dse_options = $proj->options->{DynSources}->{dse};
                my $dse_feed_filename = $task_obj->get_source_file(
                    name             => 'source_dse',
                    yt_path          => $dse_options->{yt_path_domain},
                    sec_level_domain => $task_obj->get_source_key($product_domain),
                    add_str          => "product_type=dse\t",
                );
                my $c = $proj->file($dse_feed_filename)->wc_l;
                if ($c > 0) {
                    $proj->log("dse found, lines: $c");
                    $proj->do_sys_cmd("shuf -n $dse_lines $dse_feed_filename > $task_dir/dse_offers_tskv");
                }
            }

            # external_sources
            my $ext_lines = 200;
            for my $source (@{$proj->options->{DynSources}{inclusion_params}}) {
                my $filename = $source->{filename};
                my $source_file = $task_obj->get_source_file(
                    name => $filename,
                    yt_path => $source->{table}.'{url,phrases}',
                    sec_level_domain => $task_obj->get_source_key($product_domain),
                );
                my $c = $proj->file($source_file)->wc_l;
                if ($c > 0) {
                    $proj->log("ext-source $filename found, lines: $c");
                    $proj->do_sys_cmd("shuf -n $ext_lines $source_file > $task_dir/ext-$filename");
                }
            }
        }

        $pm->finish;
    }
}

$pm->wait_all_children();

$proj->log("all done!");
exit(0);


sub save_feed_to_dir {
    my $fd = shift;
    my $dir = shift;
    my %par = @_;

    if ($fd->fds->feed_file_type !~ /^(?:yml|xml)$/) {
        # для них нужен другой offer_tag :(
        die "Bad feed_file_type!";
    }
    my $num_lines = $par{num_lines} // 200;

    my $offers_file_tmp = $fd->fds->offers_tskv_light_file;
    my $offers_file = "$dir/offers_tskv_light_file";

    # в perf берём долю от нефильтрованного tskv
    $proj->log("offers_tskv_light_file: $offers_file_tmp, lines: ".lines_count($offers_file_tmp));
    $proj->do_sys_cmd("shuf -n $num_lines $offers_file_tmp > $offers_file");

    my $categs_file_tmp = $fd->fds->categs_tskv_light_file;
    my $categs_file = "$dir/categs_tskv_light_file";
    $proj->do_sys_cmd("cp $categs_file_tmp $categs_file");
}

