#!/usr/bin/perl

use my_inc '..';

=head1 DEPLOY

# approved by zhur
# .migr
{
  type => 'script',
  when => 'after',
  time_estimate => "неделя",
  comment => "при перезапуске можно указать --from=NNN, 
номер взяв из строки вида 'start process chunk NNN/10000' из конца лога"
}

=cut

use warnings;
use strict;
use utf8;

use Yandex::DBTools;
use Yandex::Retry qw/relaxed/;
use Yandex::ListUtils qw/chunks/;
use List::MoreUtils qw/uniq/;

use Settings;
use ScriptHelper;

$log->out('START');

my $CHUNKS_NUM = 10_000;
my $FROM = 0;
extract_script_params(
    "chunks-num=i" => \$CHUNKS_NUM,
    "from=i" => \$FROM,
    );

my $max_hash = get_one_field_sql(MONITOR, "SELECT max(href_hash) FROM source_for_url");
for(my $chunk = $FROM; $chunk < $CHUNKS_NUM; $chunk++) {
    $log->out("start process chunk $chunk/$CHUNKS_NUM");
    my $bad_data = get_all_sql(MONITOR, "SELECT s.*
                                           FROM source_for_url s
                                                LEFT JOIN antispam_queue a using(href_hash)
                                          WHERE s.href_hash between ? and ?
                                            AND a.href_hash is null
                         ", int($chunk*($max_hash/$CHUNKS_NUM)), int(($chunk+1)*($max_hash/$CHUNKS_NUM)));
    $log->out("selected ".scalar(@$bad_data)." bad rows");
    for my $bad_chunk (chunks $bad_data, 100) {
        $log->out({to_delete => $bad_chunk});
        relaxed times => 2, sub {
            my $deleted += do_delete_from_table(MONITOR, "source_for_url", where => {href_hash => [uniq map {$_->{href_hash}} @$bad_chunk]});
            $log->out("deleted $deleted rows");
        };
    }
}

$log->out('FINISH');

