#!/usr/bin/env perl

=head1 SYNOPSIS

    ./63380-get-minusWords.pl

=head1 DESCRIPTION

    Скрипт для выгрузки минус фразы на кампанию/группу
    https://st.yandex-team.ru/DIRECT-63380


=cut

use constant SELECT_CHUNK_SIZE => 50000;

use my_inc '../..';

use Direct::Modern;
use Direct::Validation::MinusWords;
use MinusWordsTools;

use Yandex::DBTools qw/get_all_sql/;

use Encode qw/decode_utf8 decode/;
use JSON;
use Settings;
use ScriptHelper;
use ShardingTools qw/ppc_shards/;

$log->out('START');

my $campaigns_output_file = 'campaigns-minusWords.json';
my $groups_output_file = 'groups-minusWords.json';
my $LIMIT = SELECT_CHUNK_SIZE;

open(my $fhc, ">", $campaigns_output_file) or $log->die("Can't open file $campaigns_output_file");
open(my $fhg, ">", $groups_output_file) or $log->die("Can't open file $groups_output_file");

my (@camp_minus_words_result_arr, @group_minus_words_result_arr) = ();

for my $shard (ppc_shards()) {
#    $log->msg_prefix("[shard$shard]");

    #Выгрузка минус-слов на кампанию
    my $camp_minus_words;
    my $last_cid = 0;
    do {
        $camp_minus_words = get_all_sql(PPC(shard => $shard), 'SELECT cid, minus_words
                                                      FROM ppc.camp_options
                                                      WHERE cid > ?
                                                      AND minus_words IS NOT NULL
                                                      order by cid LIMIT ?', $last_cid, $LIMIT);
        my $last_camp = $camp_minus_words->[-1] || {};
        $last_cid = $last_camp->{cid};
        for my $camp (@{$camp_minus_words // []}) {
            my $minus_words = MinusWordsTools::minus_words_str2array($camp->{minus_words});
            my $validation_result = Direct::Validation::MinusWords::validate_campaign_minus_words($minus_words);
            my $valid = $validation_result->is_valid ? JSON::true : JSON::false;
            push @camp_minus_words_result_arr, {cid => $camp->{cid}, is_valid => $valid, minus_keywords => $minus_words};
        }
    } while @$camp_minus_words != 0;


    #Выгрузка минус-слов на группу
    my $group_minus_words;
    my $last_pid = 0;
    do {
        $group_minus_words = get_all_sql(PPC(shard => $shard), 'SELECT p.pid, mw.mw_text
                                                                FROM ppc.phrases p
                                                                JOIN ppc.minus_words mw ON p.mw_id=mw.mw_id
                                                                WHERE p.pid > ?
                                                                order by p.pid LIMIT ?', $last_pid, $LIMIT);
        my $last_group = $group_minus_words->[-1] || {};
        $last_pid = $last_group->{pid};
        for my $group (@{$group_minus_words // []}) {
            my $minus_words = MinusWordsTools::minus_words_str2array($group->{mw_text});
            my $validation_result = Direct::Validation::MinusWords::validate_group_minus_words($minus_words);
            my $valid = $validation_result->is_valid ? JSON::true : JSON::false;
            push @group_minus_words_result_arr, {pid => $group->{pid}, is_valid => $valid, minus_keywords => $minus_words};
        }
        } while @$group_minus_words != 0;
}

if (@camp_minus_words_result_arr) {
    for my $camp(@camp_minus_words_result_arr){
        print {$fhc} join ("\t", Encode::encode_utf8(to_json($camp, {canonical => 1})), "\n");
    }

if (@group_minus_words_result_arr) {
    for my $group(@group_minus_words_result_arr){
        print {$fhg} join ("\t", Encode::encode_utf8(to_json($group, {canonical => 1})), "\n");
    }
}
}

$log->msg_prefix('');

close($fhc);
close($fhg);

$log->out('FINISH');
