#!/usr/bin/perl

=head1 DEPLOY

# approved by lena-san,ppalex
# .migr
{
  type => 'script',
  when => 'after',
  time_estimate => "несколько минут",
  comment => "можно перезапускать"
}

=cut

=pod

    Как получен файл с данными:
    /mnt/remote-log-rfs$ grep --no-filename -E 'delete_agency_nds|delete_client_nds' ppcscripts*.yandex.ru/protected/balanceGetClientNDSDiscountSchedule.log.201708{29,30} | sort > ~/20170830_restore_nds_graph.data

    пример
    2017-08-29      19:36:31        [delete_client_nds]     {"ClientID":"3137648","date_to":"20031231","date_from":"20030101","nds":"0.00"}

    Что делает миграция - проверяет, нет ли соответствующего графика уже в табличке. если есть - пропускает, если нет - пишет в табличку данные из файла
    если данные есть, но не совпадают - логирует как "GRAPHS ARE DIFFERENT".
    ОСТОРОЖНО - так можно записать разные данные agency_nds в разных шардах

=cut

use Direct::Modern;

use JSON;
use Path::Tiny;
use List::MoreUtils qw/uniq/;

use Yandex::DBTools;
use Yandex::DBShards;

use my_inc '..';

use Client::NDSDiscountSchedule;
use ScriptHelper;
use Settings;
use ShardingTools;

$log->out('START');

my $data_path = my_inc::path('./20170830_restore_nds_graph.data');
my @lines = path($data_path)->lines({ chomp => 1}) or $log->die("Can't open data file $data_path: $@");

my %agency_nds;
my %client_nds;

for my $line (@lines) {
    #   date   time   
    my (undef, undef, $prefix, $row) = split (qr/\t/, $line, 4);
    $row = from_json($row);
    
    if ($prefix eq '[delete_client_nds]') {
        push @{ $client_nds{ $row->{ClientID} } }, $row;
    } elsif ($prefix eq '[delete_agency_nds]') {
        push @{ $agency_nds{ $row->{ClientID} } }, $row;
    } else {
        $log->die("unexpected prefix in line '$line'");
    }
}

$log->out("process client_nds");

my @fields = qw/ClientID date_from date_to nds/;
my $fields_str = join ',', map {sql_quote_identifier($_)} @fields;

foreach_shard("ClientID", [keys %client_nds], sub {
    my ($shard, $clid_chunk) = @_;

    my $guard = $log->msg_prefix_guard("[shard_$shard]");
    $log->out("processing shard");

    # precheck
    my %by_client;
    for my $row (@{ get_all_sql(PPC(shard => $shard), ["SELECT * FROM client_nds", WHERE => { ClientID => $clid_chunk } ]) }) {
        # приводим к виду из лога
        my $converted = Client::NDSDiscountSchedule::_deserialize_nds_row(Client::NDSDiscountSchedule::_serialize_nds_row($row));
        push @{ $by_client{ $row->{ClientID} } }, $converted;
    }

    my @to_insert;
    for my $clientid (@$clid_chunk) {
        my @candidate_rows = map {
                Client::NDSDiscountSchedule::_deserialize_nds_row($_)
            } sort {
                $a cmp $b
            } uniq ( map { Client::NDSDiscountSchedule::_serialize_nds_row($_) } @{ $client_nds{$clientid} } );
        if (exists $by_client{$clientid}) {
            my $msg = {table => "client_nds", SKIP => \@candidate_rows, db_data => $by_client{$clientid}};
            my $db_str = join("_", map { Client::NDSDiscountSchedule::_serialize_nds_row($_) } @{ $by_client{$clientid} } );
            my $log_str = join("_", map { Client::NDSDiscountSchedule::_serialize_nds_row($_) } @candidate_rows ); 
            if ($db_str ne $log_str) {
                $msg->{ERROR} = "GRAPHS ARE DIFFERENT";
            }
            $log->out($msg);
        } else {
            for my $row (@candidate_rows) {
                $log->out({table => "client_nds", insert => $row});
                push @to_insert, [ @{$row}{@fields} ];
            }
        }
    }
    my $cnt = scalar(@to_insert);
    $log->out("going to insert new data to client_nds: $cnt rows");
    $log->out({table => "client_nds", data_to_insert => \@to_insert});

    my $res = do_mass_insert_sql(PPC(shard => $shard), "INSERT IGNORE INTO client_nds ($fields_str) VALUES %s", \@to_insert);
    $log->out("affected $res rows");
});

$log->out("process agency_nds");
for my $shard(ppc_shards()) {
    my $clid_chunk = [keys %agency_nds];

    my $guard = $log->msg_prefix_guard("[shard_$shard]");
    $log->out("processing shard");

    # precheck
    my %by_client;
    for my $row (@{ get_all_sql(PPC(shard => $shard), ["SELECT * FROM agency_nds", WHERE => { ClientID => $clid_chunk } ]) }) {
        # приводим к виду из лога
        my $converted = Client::NDSDiscountSchedule::_deserialize_nds_row(Client::NDSDiscountSchedule::_serialize_nds_row($row));
        push @{ $by_client{ $row->{ClientID} } }, $converted;
    }

    my @to_insert;
    for my $clientid (@$clid_chunk) {
        my @candidate_rows = map {
                Client::NDSDiscountSchedule::_deserialize_nds_row($_)
            } sort {
                $a cmp $b
            } uniq ( map { Client::NDSDiscountSchedule::_serialize_nds_row($_) } @{ $agency_nds{$clientid} } );

        if (exists $by_client{$clientid}) {
            my $msg = {table => "agency_nds", SKIP => \@candidate_rows, db_data => $by_client{$clientid}};
            my $db_str = join("_", map { Client::NDSDiscountSchedule::_serialize_nds_row($_) } @{ $by_client{$clientid} } );
            my $log_str = join("_", map { Client::NDSDiscountSchedule::_serialize_nds_row($_) } @candidate_rows ); 
            if ($db_str ne $log_str) {
                $msg->{ERROR} = "GRAPHS ARE DIFFERENT";
            }
            $log->out($msg);
        } else {
            for my $row (@candidate_rows) {
                $log->out({table => "agency_nds", insert => $row});
                push @to_insert, [ @{$row}{@fields} ];
            }
        }
    }
    my $cnt = scalar(@to_insert);
    $log->out("going to insert new data to agency_nds: $cnt rows");
    $log->out({table => "agency_nds", data_to_insert => \@to_insert});

    my $res = do_mass_insert_sql(PPC(shard => $shard), "INSERT IGNORE INTO agency_nds ($fields_str) VALUES %s", \@to_insert);
    $log->out("affected $res rows");

}

$log->out('FINISH');

