#
#   RedirectCheckQueue
#     A module for scheduled checking redirect domains for banners
#   
#   $Id$
#
####################################################################

=head1 NAME

RedirectCheckQueue

=head1 DESCRIPTION

Interface for scheduling redirect domain check

=cut

package RedirectCheckQueue;

use warnings;
use strict;

use List::Util qw/sum/;

use Yandex::DBShards;
use Yandex::DBTools;
use Yandex::IDN;
use Yandex::ListUtils;
use Yandex::URL qw/get_host strip_protocol/;

use Settings;

use URLDomain;

#
#   add a banner to delayed redirect check queue
#
#   push_task($banner);
#
#   $banner = {
#       cid => 123,
#       bid => 234,
#       ...
#   };
#

sub push_banners {
    my ($banners) = @_;

    my @chunks = sharded_chunks(bid => [map({ $_->{bid} } @$banners)], 1_000);

    foreach my $chunk (@chunks) {
        my $bids = $chunk->{bid};
        my $shard = $chunk->{shard};

        my $data = [];

        foreach my $bid (@$bids) {
            push @$data, [$bid, 'banner'];
        }

        do_mass_insert_sql(PPC(shard => $shard), "
            replace into redirect_check_queue
                   (object_id, object_type)
                   values %s", $data, {
        }) if $data && @$data;
    }

    return 1;
}

#
#   remove old dictionary entries or exact hrefs
#
sub clear_dict {
    my $hrefs = shift;

    my $ids = [];
    if (defined $hrefs && @$hrefs) {
        $ids = get_one_column_sql(MONITOR, ["SELECT id FROM redirect_check_dict", WHERE => {href => $hrefs}]);
    } else {
        $ids = get_one_column_sql(MONITOR, "SELECT id FROM redirect_check_dict WHERE logdate < NOW() - interval 2 day");
    }

    for my $chunk (chunks([nsort @$ids], 1000)) {
        do_delete_from_table(MONITOR, "redirect_check_dict", where => {id => $chunk});
    }
}

#
#   fill the Dictionary with new redirect domain check results
#   ( [ $href, $redirect, $domain_redir ] )
#
sub feed_dict {
    my $data = [ map { [_dict_href_normalize($_->[0]), $_->[1], $_->[2], get_host($_->[1]), $_->[3] // ''] } @_ ];

    do_mass_insert_sql(MONITOR, "
        INSERT into redirect_check_dict
               (href, redirect, domain_redir, domain, redirect_result_href)
               values %s
        on duplicate key update
               redirect = values(redirect),
               domain = values(domain),
               domain_redir = values(domain_redir),
               redirect_result_href = values(redirect_result_href),
               logdate=NOW()
            ", $data);
}

#
#   get redirect domain result from Dictionary
#
sub check_dict {
    my ($href) = @_;

    return get_one_line_array_sql(MONITOR, "
        select redirect, domain_redir, redirect_result_href 
          from redirect_check_dict
         where href = ?
           and adddate(logdate, INTERVAL 2 DAY) > NOW() 
            ", _dict_href_normalize($href));
}

# удаляем незначимые параметры из урла (чтобы кеш работал лучше):
# utm_*, openstat
# запросы делаются с полными параметрами, но для кеша урлы "нормализуются"
sub _dict_href_normalize {
    my $href = shift;
    return undef if !defined $href;
    if (my ($url, $qs) = $href =~ /^(.*?)\?(.*)/) {
        $qs = join '&',
            grep {!/^(utm_|openstat|match_type|matched_keyword)/}
            split /[&;]/, $qs;
        $href = $url . ($qs eq '' ? '' : "?$qs");
    }
    return $href;
}

#
#   Are there any good urls on this domain ?
#
sub check_domain {
    my ($domain) = @_;

    my @res = get_one_line_array_sql(MONITOR, "
        select redirect, domain_redir
          from redirect_check_dict
         where domain = ?
           and logdate > NOW() - INTERVAL 2 DAY
         limit 1  
            ", $domain);

    return @res;
}

=head2 domain_need_check_redirect

    Определяет надо ли отправлять ссылку баннера на простукивание.
    На входе: баннер проверяемый и предыдущая версия баннера.
              В качестве баннеров достаточно иметь хешь с полями href и domain.
    На выходе: домен для проверяемаго баннера и флаг надо ли отправлять на простукивание.

=cut
sub domain_need_check_redirect {
    my ($banner, $old_banner) = @_;

    $old_banner = {} unless $old_banner;
    my $banner_href = strip_protocol($banner->{href});
    
    my $domain_need_check = 0;
    my $domain = $old_banner->{domain};

    if (strip_protocol($old_banner->{href}||'') ne $banner_href) {
        $domain = Yandex::IDN::idn_to_unicode(get_host($banner_href));
        $domain_need_check = 1 if (is_known_redirect($banner_href));
    }
    return ($domain, $domain_need_check);
}

=head2 calc_monitor_values

    Вычисляем значения для отправки в мониторинг

=cut
sub calc_monitor_values {
    my ($shard) = @_;

    my $queue_data = get_all_sql(PPC(shard => $shard),
        "
        select domain
                , IFNULL(TIMESTAMPDIFF(SECOND, min(logtime), NOW()), 0) as max_age
                , count(*) as size
          from redirect_check_queue rcq
                left join banners b on b.bid = rcq.object_id and rcq.object_type='banner'
          where logtime <= now()
      group by domain
      order by size desc
          ");

    my $count_uids = get_one_field_sql(PPC(shard => $shard), "select count(distinct c.uid)
          from redirect_check_queue rcq
                left join banners b on b.bid = rcq.object_id and rcq.object_type='banner'
                left join phrases p on p.pid = b.pid
                left join campaigns c on p.cid = c.cid
          where logtime <= now()");
          
    my @sizes = map { $_->{size} } sort { $b->{size} <=> $a->{size} } @$queue_data;
    my @max_ages = map { $_->{max_age} } sort { $b->{max_age} <=> $a->{max_age} } @$queue_data;

    # считаем статистику квантилями
    my %stat = ();
    foreach my $fractile_num (qw/100 70/) {
        # кол-во записей, которые пропускаем при подсчете
        my $skip_cnt = int ( scalar(@$queue_data) * (100 - $fractile_num) / 100 );

        $stat{"fractile$fractile_num"}{max_age} = $max_ages[$skip_cnt];
        $stat{"fractile$fractile_num"}{count} = sum @sizes[$skip_cnt .. $#sizes];
    }
    my $stats = {
        "direct.redirect.shard_$shard.queue_cnt_domains" => scalar(@$queue_data),
        "direct.redirect.shard_$shard.max_age"           => $stat{fractile100}{max_age} || 0,
        "direct.redirect.shard_$shard.queue_size"        => $stat{fractile100}{count} || 0,
        "direct.redirect.shard_$shard.max_age_70"        => $stat{fractile70}{max_age} || 0,
        "direct.redirect.shard_$shard.queue_size_70"     => $stat{fractile70}{count} || 0,
        "direct.redirect.shard_$shard.users_count"       => $count_uids,
    };

    return $stats;
}

1;
