#!/usr/bin/perl

=head1 DEPLOY

# .migr
{
  approved_by => 'ppalex',
  tasks => [
    {
      type => 'script',
      when => 'after',
      time_estimate => "несколько часов (~9)",
      comment => "Миграцию можно перезапускать, прогресс сохраняется в property.
      На ТС можно запустить с параметром --row_num=4500000 --shard=1"
    }
  ]
}

=cut

use Direct::Modern;

use Yandex::DBTools;
use Yandex::YT::Table;

use my_inc '..';

use Tools;
use ScriptHelper;
use ShardingTools;
use Settings;

Tools::force_set_yt_environment("hahn");

my $MAX_CID_CNT_TO_DELETE = 20;
my $TABLE_PATH_PREFIX = '//home/direct/tmp/evgetv/bids_href_params_to_delete_shard_';
my $TABLE_PATH_SUFFIX = '_3';
my $ROW_PROP_SUFFIX = '_row_num';

extract_script_params(
    'shard=i' => \my $only_shard,
    'from-beginning' => \my $from_beg,
    'row_num=i' => \my $from_row_num,
);

$log->out('START');

foreach my $shard (ppc_shards()) {

    if (defined($only_shard) && $only_shard != $shard) {
        next;
    }

    my $row_prop = Property->new(get_script_name(shardid => $shard).$ROW_PROP_SUFFIX);
    my $row_num = $row_prop->get();

    if ($from_row_num) {
        $row_num = $from_row_num;
    }

    if ($from_beg || !defined($row_num)) {
        $row_num = 1;
    }

    my $table = Yandex::YT::Table->new($TABLE_PATH_PREFIX."$shard".$TABLE_PATH_SUFFIX.'[#'.($row_num - 1).":]");
    my $reader = $table->reader();

    my $rows_cnt = Yandex::YT::Table->new($TABLE_PATH_PREFIX."$shard".$TABLE_PATH_SUFFIX)->get_attribute('row_count'); # потому что get_attribute не может съесть хвостик в конце

    my $total_cnt = 0;

    my %to_delete = ();
    my $prev_cid = -1;

    while (my $r = $reader->next()) {

        push @{$to_delete{$r->{cid}}}, $r->{id};

        # таблица отсортирована по cid
        if ($row_num == $rows_cnt || ($r->{cid} != $prev_cid && scalar keys %to_delete == $MAX_CID_CNT_TO_DELETE)) {

            $log->msg_prefix("[shard=$shard, row_num $row_num / $rows_cnt]");
            $log->out("before delete next chunk");

            # селектим всe id для накопленных cid
            my $cids = [ keys %to_delete ];
            my $good_ids = get_hash_sql(PPC(shard => $shard), [
                "SELECT id FROM bids", where => { cid => $cids },
                "UNION ALL SELECT id FROM bids_arc", where => { cid => $cids } ]);

            # проверяем есть ли наши id для удаления в населекченном
            my %to_delete_approved;
            my @wrong_data;
            foreach my $cid (keys %to_delete) {
                foreach my $id (@{$to_delete{$cid}}) {
                    if (exists($good_ids->{$id})) {
                        push @wrong_data, {id => $id, cid => $cid};
                    } else {
                        push @{$to_delete_approved{id}}, $id;
                    }
                }
            }

            if (scalar @wrong_data > 0) {
                $log->out(["!!! WRONG DATA:", \@wrong_data]);
            }

            if (exists($to_delete_approved{id}) && scalar $to_delete_approved{id} > 0) {

                $to_delete_approved{cid} = [ keys %to_delete ];

                my $to_delete_data = get_all_sql(PPC(shard => $shard),
                    ["SELECT * FROM bids_href_params", where => \%to_delete_approved]);

                $log->out("to delete:", $to_delete_data);
                my $cnt = do_delete_from_table(PPC(shard => $shard), 'bids_href_params', where => \%to_delete_approved);
                $log->out("deleted $cnt rows");

                $total_cnt += $cnt;

            } else {
                $log->out("chunk has no ids to delete");
            }

            %to_delete = ();
            $row_prop->set($row_num + 1);
        }

        $prev_cid = $r->{cid};
        $row_num += 1;
    }

    $log->msg_prefix("[shard=$shard]");
    $log->out("DONE, total deleted count: $total_cnt");

    $row_prop->delete();
}

$log->out('FINISH');
