#!/usr/bin/perl

use my_inc '..';

=head1 DEPLOY

# approved by zhur
# .migr
{
  type => 'script',
  when => 'after',
  time_estimate => '1-2 часа',
  comment => 'Чистим таблицы от записей о несуществующих клиентах',
}

=cut

use Direct::Modern;

use Yandex::DBTools;
use Yandex::ListUtils;
use Yandex::HashUtils;

use Settings;
use ScriptHelper;
use ShardingTools;

my @TABLES = qw(
    account_score
    client_discounts
    client_domains
    client_domains_stripped
    client_firm_country_currency
    client_nds
    clients_autoban
    clients_geo_ip
    clients_options
    clients_to_fetch_nds
    clients_to_force_multicurrency_teaser
    force_currency_convert
);

$log->out('START');

my $CLIENTID_SELECT_CHUNK_SIZE = 100_000;
my $DELETE_CHUNK_SIZE = 1_000;

for my $shard (ppc_shards()) {
    for my $table (@TABLES) {
        my $log_prefix_guard = $log->msg_prefix_guard("[shard $shard]\t[table $table]");

        my ($min_clientid, $max_clientid) = get_one_line_array_sql(PPC(shard => $shard), "SELECT MIN(ClientID), MAX(ClientID) FROM $table");
        $min_clientid //= 0;
        $max_clientid //= 0;
        $log->out("MIN(ClientID) = $min_clientid, MAX(ClientID) = $max_clientid");
        if ($min_clientid == $max_clientid) {
            $log->out('Nothing to process, skipping');
            next;
        }

        for (my $begin_clientid = $min_clientid; $begin_clientid <= $max_clientid; $begin_clientid += $CLIENTID_SELECT_CHUNK_SIZE + 1) {
            my $end_clientid = $begin_clientid + $CLIENTID_SELECT_CHUNK_SIZE;
            $log->out("Processing ClientIDs from $begin_clientid to $end_clientid");

            $log->out('Fetching records linked to non-existing ClientIDs');
            my $data = get_all_sql(PPC(shard => $shard), ["
                SELECT t.ClientID, t.*
                FROM $table t
                LEFT JOIN users u ON u.ClientID = t.ClientID",
                WHERE => {
                    'u.ClientID__is_null' => 1,
                    't.ClientID__between' => [$begin_clientid, $end_clientid],
                }
            ]);
            $log->out('Got ' . scalar(@$data) . ' rows of data');

            if (@$data) {
                my %clid2data;
                for my $row (@$data) {
                    push @{ $clid2data{ $row->{ClientID} } ||= [] }, $row;
                }

                my $clientids = [ keys %clid2data ];
                $log->out('Got ' . scalar(@$clientids) . ' records to delete');

                $log->out('Checking if clients are not resharding right now');
                my $resharding_clientids = get_one_column_sql(PPCDICT, ['SELECT ClientID FROM reshard_queue', WHERE => { ClientID => $clientids, status__ne => 'done' }]);
                $log->out(scalar(@$resharding_clientids) . ' clients from selected are resharding right now, skipping:', $resharding_clientids);

                my $clientids_to_delete = xminus($clientids, $resharding_clientids);
                $log->out('Going to delete ' . scalar(@$clientids_to_delete) . ' ClientIDs from DB');
                $log->out({ data_to_delete => [xflatten(values %{ hash_cut(\%clid2data, $clientids_to_delete) })] });

                for my $clientids_chunk (chunks $clientids_to_delete, $DELETE_CHUNK_SIZE) {
                    $log->out('Removing records chunks with ClientIDs:', $clientids_chunk);
                    do_delete_from_table(PPC(shard => $shard), 'users', where => {ClientID => $clientids_chunk});
                }
            }
        }
    }
}

$log->out('FINISH');
