#!/usr/bin/perl

=head1 DESCRIPTION

Скрипт для поиска и удаления "непривязанных" записей в БД,
то есть тех, у которых отсутствует запись о родительском объекте.

Параметры:

    --table t   таблица (обязательно)
    --shard-id  шард (по умолчанию - все)
    --remove    удалить найденные записи

Пример:

    # найти непривязанные записи в таблице campaigns
    ./fix-lost-records.pl --table campaigns

    # удалить записи с заданными uid
    ./fix-lost-records.pl --shard-id 1 --table campaigns --uid=8739944 --remove


=cut

use Direct::Modern;
use my_inc '../..';

use List::Util qw/pairs any/;
use Yandex::DBTools;
use Yandex::DBShards;
use Yandex::Retry qw/relaxed_guard/;
use Time::HiRes qw/time/;

use ShardingTools;
use ScriptHelper;
use Settings;


my $SLEEP_COEF = 1;
my $CHUNK_SIZE = 100000;
my $RANGE_LIMIT = 256000;
my $REQUEST_TIME_GOAL = 0.3;

our $TBL = _init_table_desc();


my %limit;
extract_script_params(
    'remove!' => \our $NEED_REMOVE,
    'cascade!' => \our $NEED_CASCADE,
    'shard-id=i' => \my @shards,
    'table=s' => \my @tables_to_check,
    'sleep-сoef=f' => \$SLEEP_COEF,
    ( map { ("$_=i" => \@{$limit{$_}}) } keys %{_parent_table_hash()} ),
);

die 'Nothing to check, please use --table'  if !@tables_to_check;
for my $table (@tables_to_check) {
    next if $TBL->{$table};
    die "Don't know how to process <$table>";
}

@shards = ppc_shards()  if !@shards;
my $is_id_limited = any {@$_} values %limit;


$log->out('START');

SHARD:
for my $shard (@shards) {
    $log->msg_prefix("Shard #$shard");

    for my $table (@tables_to_check) {
        my $table_info = $TBL->{$table};
        for my $key (@{$table_info->{keys}}) {
            if ($is_id_limited && !@{$limit{$key}}) {
                $log->out("skipping $table: no $key defined");
                next;
            }

            $log->out("searching for hung records in $table");

            my $process_sub = ref $key ? $key : \&find_hung_records;
            $process_sub->($shard, $table, $key);
        }
    }

    $log->msg_prefix(undef);
}


$log->out('FINISH');



sub remove_hung_record {
    my ($shard, $table, $key => $id, %opt) = @_;

    my $log_ext = $opt{log_ext} || '';

    my $table_info = $TBL->{$table};
    my $primary_key = $table_info->{provides};
    if ($primary_key && $primary_key ne $key) {
        my $primary_ids = get_one_column_sql(PPC(shard => $shard), [
                "SELECT $primary_key FROM $table",
                WHERE => {$key => $id},
            ]);

        for my $primary_id (@$primary_ids) {
            my $child_log_ext = " for $key=$id" . $log_ext;
            $log->out("descending to child $primary_key=$primary_id" . $child_log_ext);
            remove_hung_record($shard, $table, $primary_key => $primary_id, log_ext => $child_log_ext);
        }

        return;
    }


    if ($primary_key) {
        for my $child_table (@{$table_info->{children}}) {
            remove_hung_record($shard, $child_table, $key => $id, log_ext => $log_ext);
        }
    }

    if ($NEED_REMOVE) {
        $log->out("deleting $key=$id in $table" . $log_ext);

        do_delete_from_table(PPC(shard => $shard), $table,
            where => {$key => $id},
        );
    }
    else {
#        $log->out("skip deleting $key=$id in $table" . $log_ext);
    }

    return;
}


sub find_hung_records {
    my ($shard, $table, $key, %opt) = @_;

    my $ptable = _parent_table_hash()->{$key};
    croak "Don't know '$key' key"  if !$ptable;

    my $min_key = 1;
    my $max_key = get_one_field_sql(PPC(shard => $shard), [
            "SELECT max($key) FROM $ptable"
        ]);

    my $range_limit = $RANGE_LIMIT;

    my $table_info = $TBL->{$table};
    my @found;
    while ($min_key < $max_key) {
        my $relax = relaxed_guard(times => $SLEEP_COEF);

        $log->out("scanning $table from $key=$min_key with range $range_limit");

        my $start_time = time();
        my $hung_ids = get_one_column_sql(PPC(shard => $shard), [
            "SELECT DISTINCT tt.$key
            FROM $table tt
            LEFT JOIN $ptable st USING($key)",
            WHERE => {
                "st.${key}__is_null" => 1,
                "${key}__ge" => $min_key,
                (@{$limit{$key}}
                    ? ($key => $limit{$key})
                    : ("${key}__lt" => $min_key + $range_limit)
                ),
            },
            "ORDER BY $key",
            LIMIT => $CHUNK_SIZE,
        ]);

        $min_key =
            @{$limit{$key}}             ? $max_key + 1 :
            @$hung_ids == $CHUNK_SIZE   ? $hung_ids->[-1] + 1
                                        : $min_key + $range_limit;

        my $time_spent = time() - $start_time;
        if ($time_spent > 2 * $REQUEST_TIME_GOAL) {
            $range_limit /= 2;
        }
        elsif ($time_spent < 0.5 * $REQUEST_TIME_GOAL) {
            $range_limit *= 2;
        }

        for my $id (@$hung_ids) {
            $log->out("!! found hung records with $key=$id in $table");
            push @found, $id;
            remove_hung_record($shard, $table, $key => $id)  if $NEED_REMOVE || $NEED_CASCADE;
        }
    }

    if (@found && !$NEED_REMOVE) {
        my $id_params = join ' ' => map {"--$key=$_"} @found;
        $log->out("!! recommended action: $0 --shard-id $shard --table $table $id_params --remove ");
    }
}



sub find_hung_users {
    my ($shard, $table, $key, %opt) = @_;

    my $min_uid = 1;
    while (1) {
        my $relax = relaxed_guard(times => $SLEEP_COEF);

        $log->out("requesting chunk from uid=$min_uid"); 
        my $users = get_all_sql(PPC(shard => $shard), [
                "SELECT uid, login
                FROM users u
                LEFT JOIN clients cl USING(ClientID)",
                WHERE => {
                    uid__gt => $min_uid,
                    (@{$limit{uid}} ? (uid => $limit{uid}) : ()),
                    _OR => {
                        role => 'client',
                        role__is_null => 1,
                    },
                },
                'ORDER BY uid',
                LIMIT => $CHUNK_SIZE,
            ]);
        last if !@$users;
        $min_uid = $users->[-1]->{uid};

        my @uids = map {$_->{uid}} @$users;
        my $shard_by_uid = get_shard_multi(uid => \@uids);

        my @zombie_users = grep {!$shard_by_uid->{$_->{uid}} || $shard_by_uid->{$_->{uid}} != $shard} @$users;
        next if !@zombie_users;

        for my $user (@zombie_users) {
            my $uid = $user->{uid};
            $log->out(sprintf "!! found zombie user: uid=$uid %s, real shard %s" =>
                $user->{login} || "<empty login>",
                $shard_by_uid->{$uid} || "<unknown>",
            );

            remove_hung_record($shard, 'users', uid => $uid);
        }
    }

    return;
}



sub _parent_table_hash {
    state $parent_table = {
        uid => 'users',
        cid => 'campaigns',
        pid => 'phrases',
        bid => 'banners',
        mbid => 'mediaplan_banners',
    };
    return $parent_table;
}

sub _init_table_desc {

    my @table_keys = pairs (
        users => \&find_hung_users,
        users_options => 'uid',

        campaigns => 'uid',
        camp_options => 'cid',
        campaigns_mobile_content => 'cid',
        campaigns_performance => 'cid',

        phrases => 'cid',
        adgroups_mobile_content => 'pid',
        adgroups_performance => 'pid',
        adgroups_dynamic => 'pid',

        banners => 'cid',
        banners => 'pid',
        banners_mobile_content => 'bid',
        banners_performance => 'bid',

        user_campaigns_favorite => 'cid',
        camp_metrika_counters => 'cid',
        metrika_counters => 'cid',
        warn_pay => 'cid',
        camp_payments_info => 'cid',
        bs_export_specials => 'cid',

        mediaplan_banners => 'cid',
        mediaplan_bids => 'cid',
        mediaplan_banners_original => 'cid',
        autobudget_forecast => 'cid',
        mediaplan_bids_retargeting => 'mbid',

        group_params => 'pid',
        bids_retargeting => 'pid',
        bids_dynamic => 'pid',
        dynamic_conditions => 'pid',
        bids_performance => 'pid',
        bids_href_params => 'cid',
        bids_manual_prices => 'cid',
        bids_arc => 'cid',
        bids => 'cid',
        bids_base => 'cid',

        banner_images => 'bid',
        banners_additions => 'bid',
        banner_display_hrefs => 'bid',
        images => 'cid',
        images => 'bid',
        banner_resources => 'bid',
        banners_minus_geo => 'bid',

        tag_campaign_list => 'cid',
        tag_group => 'tag_id',
    );

    my %table_desc;
    my %provides_key = reverse %{_parent_table_hash()};

    for my $item (@table_keys) {
        my ($table, $key) = @$item;
        push @{$table_desc{$table}->{keys}}, $key;

        if (my $ckey = $provides_key{$table}) {
            $table_desc{$table}->{provides} = $ckey;
        }

        if (my $ptable = _parent_table_hash->{$key}) {
            push @{$table_desc{$ptable}->{children}}, $table;
        }
    }

    return \%table_desc;
}


