#!/usr/bin/perl

=head1 DEPLOY

# approved by pankovpv
# .migr
{
  type => 'script',
  when => 'after',
  time_estimate => '10-20 минут',
  comment => 'восстанавливаем данные по баннерам в monitor.source_for_urls, которые туда не попали из-за переполнения bid',
}

=cut

use Direct::Modern;

use JSON;
use List::MoreUtils qw(uniq);

use Yandex::DBTools;

use my_inc '..';

use Antispam;
use HashingTools;
use ScriptHelper;
use ShardingTools;
use Sitelinks;
use Settings;

my $LAST_BID_BEFORE_OVERFLOW = 2**31 - 1;
my $OVERFLOW_BEGIN_AT = '20160507';
my $TABLE_ALTERED_AT = '20160516';
my $BANNERS_LIMIT = 10_000;

$log->out('START');

for my $shard (ppc_shards()) {
    my $msg_prefix_guard = $log->msg_prefix_guard("[shard $shard]");

    my $min_bid = $LAST_BID_BEFORE_OVERFLOW;
    my $banners_cnt;
    do {
        $log->out("Fetching banners from bid $min_bid");
        my $banners = _get_banners($shard, $min_bid);
        $banners_cnt = scalar(@$banners);
        $log->out("Got $banners_cnt banners to possibly save");

        if ($banners_cnt > 0) {
            $log->out("Saving banner urls for banners from bid $min_bid");
            $min_bid = $banners->[-1]->{bid};
            add_urls_if_needed($banners);
        }
    } while ($banners_cnt && $banners_cnt == $BANNERS_LIMIT);
}

$log->out('FINISH');

=head3 _get_banners

=cut

sub _get_banners {
    my ($shard, $min_bid) = @_;

    # по большей части копипасты из Antispam::get_changed_banners и Antispam
    my %banner_fields  =  ('b.bid' => 'bid',
                           'ph.cid' => 'cid',
                           'b.href' => 'href',
                           'sitelinks_set_id' => 'sitelinks_set_id',
                           'domain' => 'domain',
                           'sum' => 'sum',
                           'b.LastChange' => 'LastChange',
                           "(
                              (c.statusActive = 'Yes' OR sum > 0 AND DATE_ADD(lastShowTime, INTERVAL 14 DAY)>NOW())
                             AND
                              (b.statusActive = 'Yes' OR ph.statusPostModerate = 'Yes' AND b.statusPostModerate = 'Yes')
                             AND
                              (c.statusShow = 'Yes')
                             AND
                              (b.statusShow = 'Yes')
                            )" => "is_url_active",
                           "(c.archived = 'Yes' OR b.statusArch = 'Yes')" => 'is_url_archived',
                       );
    my $select_banner_fields_str = join ",", map {"$_ AS $banner_fields{$_}"} keys(%banner_fields);
    return get_all_sql(PPC(shard=>$shard), [
        "SELECT STRAIGHT_JOIN $select_banner_fields_str
         FROM
            banners b
            JOIN phrases ph ON ph.pid = b.pid
            JOIN campaigns c ON ph.cid = c.cid",
         WHERE => {
            _TEXT => "IFNULL(href,'') != ''",
            'b.bid__gt' => $min_bid,
            'b.LastChange__between' => [$OVERFLOW_BEGIN_AT, $TABLE_ALTERED_AT],
        },
        'ORDER BY b.bid',
        "LIMIT $BANNERS_LIMIT",
    ]);
}


# копипаста из protected/ppcUpdateAntispamQueue.pl
=head2 add_urls_if_needed (banners)

    Добавляет ссылки банеров (основная и сайтлинки) в антиспамовую очередь при необходимости.

=cut
sub add_urls_if_needed {
    my ($banners) = @_;

    # массово получаем все сайтлинки
    my $sl_set_ids = [uniq grep {$_} map {$_->{sitelinks_set_id}} @$banners];
    my $sl_sets = Sitelinks::get_sitelinks_by_set_id_multi($sl_set_ids);
    
    # набираем урлы для проверки
    my %hashes;
    my @url_sources;
    for my $banner (@$banners) {
        my $status = $banner->{is_url_active} ? $Antispam::STATUS_ACTIVE : $Antispam::STATUS_NON_ACTIVE;
        
        my $href_hash = url_hash_utf8($banner->{href});
        $hashes{$href_hash} ||= {
            bid => $banner->{bid},
            href => $banner->{href},
        };
        $hashes{$href_hash}->{status} = $status if !$hashes{$href_hash}->{status} || $status eq $Antispam::STATUS_ACTIVE;
        push @url_sources, {obj_type => $Antispam::OBJ_BANNER, obj_id => $banner->{bid}, href => $banner->{href}};

        next unless $banner->{sitelinks_set_id} && $sl_sets->{$banner->{sitelinks_set_id}};
    
        # Проверяем сайтлинки
        for my $sitelink (@{$sl_sets->{$banner->{sitelinks_set_id}}}) {
            my $sl_hash = url_hash_utf8($sitelink->{href});
            $hashes{$sl_hash} ||= {
                bid => $banner->{bid},
                href => $sitelink->{href},
            };
            $hashes{$sl_hash}->{status} = $status if !$hashes{$sl_hash}->{status} || $status eq $Antispam::STATUS_ACTIVE;
            push @url_sources, {obj_type => $Antispam::OBJ_SITELINK, obj_id => $sitelink->{sl_id}, href => $sitelink->{href}};
        }
    }

    # проверяем, нужно ли их добавлять
    my @hrefs_to_check = map {+{href_hash => $_, status => $hashes{$_}->{status}}} keys %hashes;
    my $need_save = Antispam::need_save(\@hrefs_to_check);

    # сохраняем в antispam_queue
    my @to_save;
    while(my ($href_hash, $href_data) = each %hashes) {
        next unless $need_save->{$href_hash};
        $log->out("Save url: ".to_json($href_data));
        push @to_save, { 
            href => $href_data->{href},
            status => $href_data->{status},
            priority => $Antispam::PRIORITY_REGULAR,
            referer => Antispam::create_referer($href_data->{bid}),
        };
    }

    Antispam::save_urls(\@to_save);
    Antispam::save_url_sources(\@url_sources);
}
