#!/usr/bin/perl

=head1 METADATA

<crontab>
    time: * * * * *
    package: scripts-switchman
    <switchman>
        group: scripts-resharding
        <leases>
            mem: 1000
        </leases>
    </switchman>
</crontab>

<crontab>
    time: * * * * *
    flock: 1
    <switchman>
        group: scripts-test
    </switchman>
    package: conf-test-scripts
</crontab>

<juggler>
    host:   checks_auto.direct.yandex.ru
    ttl: 2h
    tag: direct_group_internal_systems
    <notification>
        template: on_status_change
        status: OK
        status: CRIT
        method: telegram
        login: DISMonitoring
    </notification>
</juggler>

=cut


=head1 NAME

    ppcReSharder.pl

=head1 DESCRIPTION

    Выполнение заявок на перенос клиентов из ppcdict.reshard_queue

=cut


use strict;
use warnings;

use JSON;
use Carp qw/croak/;

use Yandex::DateTime qw/now/;
use Yandex::DBTools;
use Yandex::DBShards;

use my_inc "..";

use Settings;
use ScriptHelper get_file_lock => undef;

use Direct::ReShard;
use Direct::ReShard::Process;
use ShardingTools;
use LockTools;

my (@CLIENT_IDS);
extract_script_params(
    'client-id=i' => \@CLIENT_IDS,
    'block-sleep=i' => \$BLOCK_SLEEP,
    'block-shard-sleep=i' => \$BLOCK_SHARD_SLEEP,
    'max-run-time=i' => \$MAX_RUN_TIME,
);

# client-id указывается только при запусках с бет, в этом случае lock не нужен
get_file_lock(2*$MAX_RUN_TIME, get_script_name()) if !@CLIENT_IDS;

main();

sub main {
    $log->out("START");

    # SQL-запросы, инициированные через этот скрипт, помечаем меткой resharding
    # планируется использовать логику учитывающую эту метку в ESS
    my $old_vars = $Yandex::DBTools::TRACE_COMMENT_VARS;
    local $Yandex::DBTools::TRACE_COMMENT_VARS = sub {return {%{$old_vars->()}, resharding => 1}; };

    my $start_time = time;

    while(1) {
        if (my $reason = smart_check_stop_file()) {
            $log->out("$reason! Exiting.");
            exit 0;
        }

        if (my $task = get_reshard_task()) {
            process_reshard_task($task);
        } else {
            if (@CLIENT_IDS) {
                my $active_tasks_cnt = get_one_field_sql(PPCDICT, ["SELECT count(*) from reshard_queue", WHERE => {status__not_in => ['done', 'error'], ClientID => \@CLIENT_IDS}]);
                if (!$active_tasks_cnt) {
                    $log->out("No tasks for specified clients, exit");
                    last;
                }
            }
            $log->out("no available task");
            sleep $ITERATION_INTERVAL;
        }

        juggler_ok();

        if (time >= $start_time + $MAX_RUN_TIME) {
            $log->out("run time: ".(time - $start_time).", exit");
            last;
        }
    }

    $log->out("FINISH"); 
}

sub get_reshard_task {

    my %common_cond = (
        wanted_start_time__le__dont_quote => 'now()',
        );
    $common_cond{ClientID} = \@CLIENT_IDS if @CLIENT_IDS;

    # считаем общее количество и время начатых заданий
    my ($cnt, $ela) = get_one_line_array_sql(PPCDICT, ["
                            SELECT count(*), sum(dump_ela)
                              FROM reshard_queue",
                             WHERE => {
                                 %common_cond,
                                 status__not_in => ['new', 'done', 'error'],
                                 }
                      ]);
    # считаем количество заданий с заблокированным шардом
    my ($locked_shard_cnt, $locked_shard_ela) = get_one_line_array_sql(PPCDICT, ["
                            SELECT count(*), sum(dump_ela)
                              FROM reshard_queue",
                             WHERE => {
                                 %common_cond,
                                 status => ['locked-shard', 'after-locked-shard'],
                                 }
                      ]);
    

    my @statuses = qw/locked-shard after-locked-shard moved/;
    if (($locked_shard_ela//0) <= $BLOCK_SHARD_SLEEP) {
        push @statuses, 'locked', 'after-locked';
    }
    if ($cnt < $MAX_STARTED_TASKS && ($ela//0) <= $BLOCK_SLEEP + $BLOCK_SHARD_SLEEP) {
        push @statuses, 'new', 'dumped';
    }
    my $STATUS_COND_SQL = "(status = 'locked' and last_step_time <= now() - interval $BLOCK_SLEEP second
                        or status = 'locked-shard' and last_step_time <= now() - interval $BLOCK_SHARD_SLEEP second
                        or status not in ('locked', 'locked-shard')
                       )";

    my $task = get_one_line_sql(PPCDICT, ["
                            SELECT id, status, ClientID, old_shard, new_shard, lock_data
                              FROM reshard_queue",
                             WHERE => {
                                %common_cond,
                                status => \@statuses,
                                _TEXT => $STATUS_COND_SQL,
                                }, "
                             ORDER BY FIND_IN_SET(status, 'moved,after-locked-shard,locked-shard,after-locked,locked,dumped,new')
                                    , wanted_start_time, id
                            "]);
    $task->{lock_data} = from_json($task->{lock_data}) if $task && defined $task->{lock_data};
    return $task;
}

sub process_reshard_task {
    my ($task) = @_;

    local $log->{msg_prefix} = "[task_id=$task->{id},ClientID=$task->{ClientID}]";
    $log->out("get task", $task);

    my ($ClientID, $old_shard, $new_shard) = @{$task}{qw/ClientID old_shard new_shard/};
    if ($task->{status} =~ /^(?:new|dumped)$/) {
        my $shard = get_shard(ClientID => $ClientID);
        my @errs;
        push @errs, "Unknown shard for Client" if !$shard;
        push @errs, "Incorrect old_shard: $old_shard != $shard" if $old_shard != $shard;
        push @errs, "Incorrect new_shard: $new_shard" if $new_shard == $shard || !grep {$_==$new_shard} ppc_shards();
        if (@errs) {
            $log->out(\@errs);
            update_reshard_task($task, status => 'error');
            return;
        }
    }

    my $lock_quard = sql_lock_guard(PPCDICT, "RESHARD_CLIENT_$ClientID", 5);
    my $resharder = Direct::ReShard->create(
        simulate => 0,
        log_data => 0,
        validate_db_schema => $VALIDATE_DB_SCHEMA,
        validate_db_cols => $VALIDATE_DB_COLS,
        orig_shard => ($VALIDATE_DB_SCHEMA ? undef : $old_shard),
        transaction_per_table => $TRANSACTION_PER_TABLE,
        );

    if ($task->{status} eq 'new') {
        $log->out("dump client data, heat the database");
        update_reshard_task($task, start_time => now());

        $resharder->simulate(1);
        $resharder->log_data(1);
        $resharder->move_client_data($ClientID, $old_shard, $new_shard);
        $resharder->simulate(0);
        $resharder->log_data(0);

        my $lock_data = $resharder->get_client_lock_data($ClientID, $old_shard);

        update_reshard_task($task, lock_data => $lock_data, status => 'dumped');

    } elsif ($task->{status} eq 'dumped') {
        $log->out("lock client");
        $resharder->lock_client($ClientID, $old_shard, $task->{lock_data});
        update_reshard_task($task, status => 'locked');

    } elsif ($task->{status} eq 'locked') {
        update_reshard_task($task, status => 'after-locked');

    } elsif ($task->{status} eq 'after-locked') {
        $log->out("lock client shard");
        save_shard(ClientID => $task->{ClientID}, shard => 0);
        update_reshard_task($task, status => 'locked-shard');

    } elsif ($task->{status} eq 'locked-shard') {
        update_reshard_task($task, status => 'after-locked-shard');

    } elsif ($task->{status} eq 'after-locked-shard') {
        $log->out("move client data");
        $resharder->move_client_data($ClientID, $old_shard, $new_shard);
        update_reshard_task($task, status => 'moved');

    } elsif ($task->{status} eq 'moved') {
        $log->out("unlock client");
        $resharder->unlock_client($ClientID, $new_shard, $task->{lock_data});
        update_reshard_task($task, status => 'done', done_time => now());
    }
}

sub update_reshard_task {
    my ($task, %kv) = @_;

    $log->out("update task:" => \%kv);

    if (my @wrong_keys = grep {!/^(status|lock_data|start_time|done_time)$/} keys %kv) {
        croak "Unsupported keys: ".join(',', @wrong_keys);
    }

    my %vals = %kv;
    $vals{lock_data} = to_json($vals{lock_data}) if defined $vals{lock_data};
    $vals{last_step_time__dont_quote} = 'NOW()';

    do_update_table(PPCDICT, "reshard_queue", \%vals, where => {id => $task->{id}}) > 0
        || $log->die("Can't update status for task $task->{id}: no such task");
}

