#!/usr/bin/env perl

use my_inc "..";

=head1 METADATA

<crontab>
    time: */3 * * * *
    <switchman>
        group: scripts-other
        <leases>
            mem: 60
        </leases>
    </switchman>
    package: scripts-switchman
    sharded: 1
</crontab>

<juggler>
    host:   checks_auto.direct.yandex.ru
    sharded:        1
    ttl:            1h
    tag: direct_group_internal_systems
</juggler>

=cut

=head1 NAME

$Id$

=head1 DESCRIPTION

    Скрипт для отслеживания новых доменов на клиентских аккаунтах и записи их в таблицу client_domains по каждому шарду.
    В дальнейшем, информация по новым доменам используется для отправки уведомлений клиентам (JobSendWarnClientDomains) и для отправки в Баланс/CRM.

    После создания баннера, до прохождения модерации и до отправки в БК, записываем домен со статусом removed=1.
    Как только баннер с доменом уйдёт в БК, меняем на removed=0, и после этого домен *никогда* не сможет стать не-активным (removed=1),
    даже если домен в баннере поменяют, и после создадут другой черновик с таким-же доменом.

=cut

use Direct::Modern;
use open ':std' => ':utf8';

use ScriptHelper 'get_file_lock' => [1800], sharded => 1, 'Yandex::Log' => 'messages';

use List::Util qw/maxstr max/;
use Yandex::DBTools;
use Yandex::DBShards;
use Yandex::TimeCommon qw/check_mysql_date/;
use Yandex::MirrorsTools::Hostings qw//;

use Settings;
use Property;
use Tools qw//;
use Primitives qw/get_domain2domain_id/;

my $BANNER_CNT_LIMIT = 100_000;

$log->out("START");

Tools::stop_the_world_for_dst_transition_hour($log);

extract_script_params();

# Вспоминаем дату последнего обновления и последний bid -- чтобы не загружать данные повторно
my $prop_last_change = new Property("PrepareClientDomains_last_change_shard_$SHARD");
my $prop_last_bid    = new Property("PrepareClientDomains_last_bid_shard_$SHARD");

my ($last_change, $last_change_mcb) = split /;/, ($prop_last_change->get() // '');
$last_change     = check_mysql_date($last_change    ) ? $last_change     : '2000-01-01 00:00:00';
$last_change_mcb = check_mysql_date($last_change_mcb) ? $last_change_mcb : '2000-01-01 00:00:00';

my ($last_bid, $last_bid_mcb) = split /;/, ($prop_last_bid->get() // '');
$last_bid //= 0;
$last_bid_mcb //= 0;

$log->out("Starting with last_change: $last_change, last_bid: $last_bid / last_change_mcb: $last_change_mcb, last_bid_mcb: $last_bid_mcb");

sub get_banners_for_processing {
    my ($type, $condition, %options) = @_;

    my @join;
    my $bid_name;
    if ($type eq 'text') {
        $bid_name = 'bid';
        push @join, 'JOIN phrases p using(cid)',
                    'JOIN banners b FORCE INDEX(LastChange) using(pid)';
    } elsif ($type eq 'mcb') {
        $bid_name = 'mbid';
        push @join, 'JOIN media_groups using(cid)',
                    'JOIN media_banners b using(mgid)';
    } else {
        die "get_banners_for_processing got unknown type: $type";
    }

    my $sql = sprintf <<EOS, $bid_name, $type, join("\n", @join), $type, $bid_name, $bid_name; 
        SELECT
            u.ClientID, b.%s bid,
            b.domain,
            (b.BannerID = 0 or b.statusModerate != 'Yes' or c.sum <= c.sum_spent) removed,
            '%s' camp_type,
            DATE_FORMAT(b.LastChange,'%%Y-%%m-%%d %%H:%%i:%%s') LastChange
        FROM
            campaigns c
            %s
            JOIN users u on u.uid = c.uid
        WHERE
            c.archived = 'No'
            AND c.statusEmpty = 'No'
            AND c.type = '%s'
            AND (
                   -- Оставим зазор на время транзакции: 10 минут
                   (b.LastChange > ? AND DATE_ADD(b.LastChange, INTERVAL 10 MINUTE) <= NOW())
                OR (b.LastChange = ? AND b.%s > ?)
            )
            AND b.href IS NOT NULL
            AND b.domain IS NOT NULL
        ORDER BY b.LastChange, b.%s
        LIMIT ?
EOS

    return get_all_sql(PPC(shard => $options{shard}), $sql, @{$condition}{qw/last_change last_change last_bid/}, $options{limit});
}

# Получаем полный список URL и доменов из промодерированных баннеров в оплаченных кампаниях
my $banners_text = get_banners_for_processing(
    text => {
        last_change => $last_change,
        last_bid => $last_bid,
    },
    limit => $BANNER_CNT_LIMIT,
    shard => $SHARD,
);
my $banners_mcb = get_banners_for_processing(
    mcb => {
        last_change => $last_change_mcb,
        last_bid => $last_bid_mcb,
    },
    limit => $BANNER_CNT_LIMIT,
    shard => $SHARD,
);

my $max_last_change     = maxstr $last_change,     map { $_->{LastChange} } @$banners_text;
my $max_last_change_mcb = maxstr $last_change_mcb, map { $_->{LastChange} } @$banners_mcb;
$last_bid               = max(map { $_->{bid} } grep { $_->{LastChange} eq $max_last_change     } @$banners_text) || $last_bid;
$last_bid_mcb           = max(map { $_->{bid} } grep { $_->{LastChange} eq $max_last_change_mcb } @$banners_mcb)  || $last_bid_mcb;

# Группируем баннеры по доменам из текстовых и МКБ кампаний
my %data_grouped;
my $domain2stripped = {};
my $domain2domain_id = {};
my $total = 0;
for my $banner (@$banners_text, @$banners_mcb) {
    my $gkey = join ':', $banner->{ClientID}, $banner->{camp_type};
    my $domain = $banner->{domain};

    $data_grouped{$gkey}->{$domain} ||= $banner;
    $data_grouped{$gkey}->{$domain}->{removed} &&= $banner->{removed};

    # Дополнительно для текстовых кампаний получим домен-владелец
    if ($banner->{camp_type} eq 'text') {
        my $stripped_domain = $domain2stripped->{$domain} = Yandex::MirrorsTools::Hostings::strip_domain($domain);
        $domain2domain_id->{$stripped_domain} = 0;
    }

    $domain2domain_id->{$banner->{domain}} = 0;
    $total++;
}
$domain2domain_id = get_domain2domain_id([keys %$domain2domain_id]);

$log->out("Found: $total banners");

# Подготовим данные для сохранения в таблицах client_domains/client_domains_stripped
my (@records, @records_stripped);
for my $gkey (keys %data_grouped) {
    my ($ClientID, $camp_type) = split /:/, $gkey;

    my $existent_domains_by_ukey = get_hashes_hash_sql(PPC(shard => $SHARD), [
        q{SELECT CONCAT_WS(':', ClientID, camp_type, domain_id), record_id, domain_id, removed FROM client_domains},
        WHERE => {
            ClientID => $ClientID,
            camp_type => $camp_type,
            domain_id => [map { $domain2domain_id->{$_} } keys %{$data_grouped{$gkey}}],
        },
    ]);
    for my $banner (values %{$data_grouped{$gkey}}) {
        my $domain = $banner->{domain};
        my $domain_id = $domain2domain_id->{$domain} // 0;
        my $ukey = join ':', $ClientID, $camp_type, $domain_id;
        my ($record_id, $removed);

        if (!$domain_id) {
            $log->warn({"Cannot get domain_id for banner:" => $banner});
            next;
        }

        if (exists $existent_domains_by_ukey->{$ukey}) {
            $record_id = $existent_domains_by_ukey->{$ukey}->{record_id};
            $removed = $existent_domains_by_ukey->{$ukey}->{removed} && $banner->{removed} ? 1 : 0;
            next if $removed == $existent_domains_by_ukey->{$ukey}->{removed};
        } else {
            $removed = $banner->{removed} ? 1 : 0;
        }

        $log->out({record_id => $record_id // 'new', ClientID => $ClientID, camp_type => $camp_type, domain => $domain, domain_id => $domain_id, removed => $removed});

        push @records, [$record_id, $ClientID, $camp_type, $domain_id, $removed];
        push @records_stripped, [undef, $ClientID, $domain2domain_id->{$domain2stripped->{$domain}}] if $banner->{camp_type} eq 'text' && !$record_id;
    }
}

if (@records) {
    # Запросим айдишники и вставим данные
    my $record_ids = get_new_id_multi('client_domains_record_id', scalar(grep { !defined $_->[0] } @records));
    my $sync_ids = get_new_id_multi('client_domains_sync_id', scalar(@records));
    for (@records) {
        $_->[0] //= shift @$record_ids; # record_id
        $_->[5]   = shift @$sync_ids;   # sync_id
    }

    do_mass_insert_sql(PPC(shard => $SHARD), q{
        INSERT INTO client_domains (record_id, ClientID, camp_type, domain_id, removed, sync_id) VALUES %s
        ON DUPLICATE KEY UPDATE
            removed = VALUES(removed),
            logtime = VALUES(logtime),
            sync_id = VALUES(sync_id)
    }, \@records);
}

if (@records_stripped) {
    my $record_stripped_ids = get_new_id_multi('client_domains_stripped_record_id', scalar(@records_stripped));
    $_->[0] = shift @$record_stripped_ids for @records_stripped;

    do_mass_insert_sql(PPC(shard => $SHARD), q{
        INSERT IGNORE INTO client_domains_stripped (record_id, ClientID, domain_id) VALUES %s
    }, \@records_stripped);
}

# Перезапишем дату последнего обновления
$prop_last_change->set($max_last_change . ';' . $max_last_change_mcb);
$prop_last_bid->set($last_bid . ';' . $last_bid_mcb);

$log->out("Updating last_change: $max_last_change last_bid: $last_bid / last_change_mcb: $max_last_change_mcb last_bid_mcb: $last_bid_mcb");

juggler_ok();

$log->out('FINISH');
