#!/usr/bin/perl

=head1 DEPLOY

# .migr
{
  tasks => [
    {
      type => 'script',
      when => 'after',
      time_estimate => 'примерно 20-30 минут (измерил время исправления 10_000 баннеров и экстраполировал на общее количество баннеров с некорректными domain-ами)',
      comment => 'запускать по  просьбе от voronov@; убедиться перед запуском, что таблица //home/direct/tmp/voronov/mobile_content_banners_to_fix существует на кластере banach',
    }
  ],
  approved_by => 'hrustyashko'
}

=cut


# https://yql.yandex-team.ru/Operations/WowkHCK7Yo_P3FU_r-nZfulkrltTw_botvWFhmuil3g=


use my_inc '..';
use Direct::Modern;

use BS::ResyncQueue qw/bs_resync/;
use List::MoreUtils qw/uniq/;
use MirrorsTools;
use RedirectCheckQueue;
use ScriptHelper;
use Settings;
use Yandex::IDN qw/idn_to_ascii idn_to_unicode/;
use Yandex::DBShards;
use Yandex::DBTools;
use Yandex::HashUtils qw/hash_cut/;
use Yandex::Retry qw/relaxed_guard/;
use Yandex::YT::Table;

$Yandex::YT::Streaming::DEFAULT_FORMAT = 'json';

my $LIMIT      = 0;
my $DRYRUN     = 0;
my $YT_CLUSTER = 'banach';
my $CHUNK_SIZE = 1000;
my $PRIORITY   = $BS::ResyncQueue::PRIORITY_RESEND_DOMAINS_BS;
my $TABLE      = '//home/direct/tmp/voronov/mobile_content_banners_to_fix';

$log->out('START');

extract_script_params(
    'limit=i'     => \$LIMIT,
    'dry'         => \$DRYRUN,
    'cluster=s'   => \$YT_CLUSTER,
    'table=s'     => \$TABLE,
    'chunksize=i' => \$CHUNK_SIZE,
    'priority=i'  => \$PRIORITY,
);

my $TOTAL_UPDATED_BANNERS = 0;
my $IS_LIMIT_REACHED      = 0;

if ($LIMIT) {
    $log->out( sprintf( 'limit on pids amount is given - %s' => $LIMIT ) );
}

Tools::force_set_yt_environment($YT_CLUSTER);

my $table = Yandex::YT::Table->new($TABLE);
unless ($table->exists()) {
    $log->die("table $TABLE doesn't exists");
}

my $mirror = MirrorsTools->new(
    dont_load_file => 1,
    use_db => 1,
);

my @bids = ();
my $reader = $table->reader();
while ( my $r = $reader->next() ) {

    push @bids, { bid => $r->{bid} };

    if ( @bids >= $CHUNK_SIZE ) {
        proccess_bids( \@bids );
        @bids = ();
    }

    last if $IS_LIMIT_REACHED;
}

if ( @bids ) {
    proccess_bids( \@bids );
}

$log->out('FINISH');

sub proccess_bids {
    my $bids = shift;

    foreach_shard bid => $bids, with_undef_shard => 1, sub {
        my ( $shard, $chunk ) = @_;

        return if $IS_LIMIT_REACHED;

        my @bids = map { $_->{bid} } @$chunk;

        if (!$shard) {
            $log->out( sprintf('can\'t guess chard for bids %s', join(', ' => @bids) ) );
            return;
        }

        $log->out( sprintf('shard #%s, %s pids', $shard, scalar(@bids) ) );

        my $rg = relaxed_guard times => 1;

        my $banners = get_banners_info_from_db( $shard, \@bids );

        my $banners_to_update = calc_and_check_domains( $banners );

        update_domains_for_banners( $shard, $banners_to_update );

        copy_filter_domain_to_shard( $shard, $banners_to_update );

        resync_with_bs( $banners_to_update );

        check_redirects( $banners_to_update );
    };
}

$log->out( sprintf( 'total %s adgroups updated' => $TOTAL_UPDATED_BANNERS ) );

$log->out('FINISH');


sub get_banners_info_from_db {
    my ($shard, $bids) = @_;

    return get_all_sql(PPC(shard => $shard), [
        'select
            b.bid, b.href, b.domain, b.statusArch as archivedBanner,
            b.LastChange, c.cid, c.archived as archivedCampaign
        from
            banners b
                join phrases p using (pid)
                join campaigns c on (p.cid = c.cid)',
        where => {
            'b.bid' => $bids,
            'b.banner_type' => 'mobile_content',
        }
    ]);
}

sub calc_and_check_domains {
    my ($banners) = @_;

    $log->out( sprintf('check domains for %s banners', scalar(@$banners)) );

    my @to_update;
    for my $banner ( @$banners ) {  

        my ($domain_target, $domain_check_redirect) = RedirectCheckQueue::domain_need_check_redirect({href => $banner->{href}});

        $domain_target = idn_to_unicode($domain_target);

        if ($domain_target eq $banner->{domain}) {
            $log->out( sprintf('banner #%s has correct domain [%s] already', $banner->{bid}, $domain_target) );
            next;
        }

        $banner->{correct_domain} = $domain_target;
        $banner->{check_redirect} = $domain_check_redirect;

        push @to_update, $banner;
    }

    return \@to_update;
}

sub update_domains_for_banners {
    my ( $shard, $banners ) = @_;

    my $size = scalar @$banners;

    return unless $size;

    $log->out(['update domains for '. $size .' banners', [ map { hash_cut($_, qw/bid href domain correct_domain LastChange/) } @$banners ]]);

    my %bid_to_correct_domain = map { $_->{bid} => { domain => $_->{correct_domain}, LastChange => $_->{LastChange} } } @$banners;
    my %bid_to_old_href       = map { $_->{bid} => $_->{href} } @$banners;

    my $updated_banners = 0;
    if ( !$DRYRUN ) {
        $updated_banners = do_mass_update_sql(PPC(shard => $shard), 'banners', 'bid', \%bid_to_correct_domain,
            where => { href__dont_quote => sql_case( 'bid', \%bid_to_old_href, default__dont_quote => sql_quote_identifier('href') ) });
    }

    if ( $size != $updated_banners ) {
        $log->out( sprintf( 'number of updated rows %s not equal to number of rows planned for update %s', $updated_banners, $size ) );
    }

    $TOTAL_UPDATED_BANNERS += $updated_banners;

    if ( $LIMIT && $TOTAL_UPDATED_BANNERS > $LIMIT ) {
        $IS_LIMIT_REACHED = 1;
    }
} 

sub copy_filter_domain_to_shard {
    my ( $shard, $banners ) = @_;

    my $size = scalar @$banners;

    return unless $size;

    my @domains = uniq map { $_->{correct_domain} } @$banners;

    my @to_insert;
    foreach my $domain ( @domains ) {
        my $filter_domain = $mirror->domain_filter( idn_to_ascii( $domain ) );
        next if $domain eq $filter_domain;
        push @to_insert, [$domain, $filter_domain];
    }

    my $res = do_mass_insert_sql(PPC(shard => $shard),
            'insert into filter_domain (domain, filter_domain) values %s
            on duplicate key update filter_domain = values(filter_domain)', \@to_insert);

    return $res;
}

sub resync_with_bs {
    my ( $banners ) = @_;

    return unless scalar @$banners;

    my @to_resync = map { +{
                        cid => $_->{cid},
                        bid => $_->{bid},
                        priority => $PRIORITY,
                    } }
                    grep {
                           $_->{archivedBanner}   ne 'Yes'
                        && $_->{archivedCampaign} ne 'Yes'
                    } @$banners;

    my $size = scalar @to_resync;

    return unless $size;

    $log->out(['resync with bs '. $size .' banners', \@to_resync]);
    
    if ( !$DRYRUN ) {
        bs_resync( \@to_resync );
    }
}

sub check_redirects {
    my ( $banners ) = @_;

    return unless scalar @$banners;

    my @to_check = map { +{ bid => $_->{bid} } }
                    grep { $_->{check_redirect} } @$banners;

    my $size = scalar @to_check;

    return unless $size;

    $log->out(['check redirects for '. $size .' banners', \@to_check]);
    
    if ( !$DRYRUN ) {
        RedirectCheckQueue::push_banners( \@to_check );
    }
}