package AggregatorDomains;

# $Id: AggregatorDomains.pm 162882 2018-08-01 13:37:24Z a-lobanova $

=head1 NAME
    
    AggregatorDomains

=head1 DESCRIPTION

    Функции для расклейки доменов-агрегаторов

=cut

use strict;
use warnings;
use utf8; 

use List::MoreUtils qw/uniq part/;

use Yandex::I18n;
use Yandex::HashUtils;
use Yandex::URL;
use Yandex::DBShards qw/sharded_chunks/;

use Settings;

use Yandex::DBTools;
use PrimitivesIds qw//;
use Client qw//;
use Yandex::IDN qw/is_valid_domain/;


use base qw/Exporter/;
our @EXPORT = qw//;

my $MIN_SUBDOMAIN_LENGTH = 4;

my $SUBDOMAIN_GETTER = {
    'vk.com' => \&_get_subdomain_generic,
    'instagram.com' => \&_get_subdomain_generic,
    'ok.ru' => \&_get_subdomain_for_ok_ru,
    'youtube.com' => \&_get_subdomain_for_youtube_com,
    'sites.google.com' => \&_get_subdomain_for_sites_google_com,
};

our $FEATURE_NAME_BY_DOMAIN = {
    'vk.com' => 'aggregator_domains_vk_com',
    'instagram.com' => 'aggregator_domains_instagram_com',
    'ok.ru' => 'aggregator_domains_ok_ru',
    'youtube.com' => 'aggregator_domains_youtube_com',
    'sites.google.com' => 'aggregator_domains_sites_google_com',
};

our @ALLOWED_DOMAINS = sort keys %$SUBDOMAIN_GETTER;


=head2 extract_aggregator_domain_from_url

  По href расклеивает домены-агрегаторы
  нужно для кластеризации в БК, 
  чтобы можно было показывать одновременно рекламу двух разных рекламодателей, если они размещаются на одном агрегаторе

=cut

sub extract_aggregator_domain_from_url {
    my ($href, $enabled_domains) = @_;
    return undef unless $href;

    $href = lc($href);
    $href = strip_protocol($href); # strip "http://"
    $href =~ s/[?#].*$//; # strip parameters

    my ($orig_domain, $path) = split('/', $href, 2); 
    my $all_level_domains = get_domain_all_level_domains($orig_domain);
    
    my ($domain) = grep { $SUBDOMAIN_GETTER->{$_} } @$all_level_domains;
    return undef unless $domain && $enabled_domains->{ $domain };

    my $subdomain = $SUBDOMAIN_GETTER->{ $domain }->($path);  
    $subdomain = _clear_subdomain($subdomain);

    if ((length($subdomain) >= $MIN_SUBDOMAIN_LENGTH) 
        && is_valid_domain("$subdomain.$domain")) {
        return "$subdomain.$domain";
    } else {
        return undef;
    }
}

sub _clear_subdomain {
    my $subdomain = shift;

    if ($subdomain) {
        # заменим недопустимые в домене символы на '-'
        my $invalid_letters = sprintf("[^%s0-9-]+", $Settings::ALLOWED_ALPHABET_LETTERS);

        $subdomain =~ s/$invalid_letters/-/ig;
        $subdomain =~ s/-+/-/g;
        $subdomain =~ s/(^-|-$)//g;
    }
    return $subdomain;
}

sub _get_subdomain_generic {
    my $path = shift;
    my $subdomain = '';

    if ($path && $path =~ /^([^\/]+)/) {
        $subdomain = $1;
    }
    return $subdomain;
}

sub _get_subdomain_for_ok_ru {
    my $path = shift;
    my $subdomain = '';

    if (!$path) { 
    } elsif ($path =~ /^(profile)\/(\d+)/) {
        $subdomain = "$1-$2";
    } elsif ($path =~ /^(group)\/?(\d+)/) {
        $subdomain = "$1-$2";
    } elsif ($path =~ /^([^\/]+)/) {
        $subdomain = $1;
    }
    return $subdomain;
}

sub _get_subdomain_for_youtube_com {
    my $path = shift;
    my $subdomain = '';

    if ($path && $path =~ /^(channel)\/([^\/]+)/) {
        $subdomain = "$1-$2";
    }
    return $subdomain;
}

sub _get_subdomain_for_sites_google_com {
    my $path = shift;
    my $subdomain = '';

    if ($path && $path =~ /^(site|view)\/([^\/]+)/) {
        $subdomain = "$2-$1";
    }
    return $subdomain;
}

=head2 update_aggregator_domains

  Сохранить в БД домены для кластеризации 

=cut

sub update_aggregator_domains
{
    my ($hrefs_by_bids) = @_;

    my @bids = grep { $_ } keys %{ $hrefs_by_bids // {} };
    return [] unless @bids;

    my $client_ids_by_bids = PrimitivesIds::get_key2clientid(bid => \@bids);

    my $enabled_domains_by_client_id = _get_enabled_domains([uniq grep {$_} values %$client_ids_by_bids]);

    my $domains_by_bids = {};
    foreach my $bid (@bids) {
        my $client_id = $client_ids_by_bids->{ $bid };
        if ($client_id && scalar(keys %{ $enabled_domains_by_client_id->{ $client_id } // {} })) {
            my $href = $hrefs_by_bids->{ $bid };
            my $enabled_domains = $enabled_domains_by_client_id->{ $client_id };
            $domains_by_bids->{ $bid } = extract_aggregator_domain_from_url($href, $enabled_domains);
        } else {
            $domains_by_bids->{ $bid } = undef;
        }
    }
    my $old_domains_by_bids = get_aggregator_domains_by_bids(\@bids);

    my @changed_rows_bids = grep { ($old_domains_by_bids->{$_} // '') ne ($domains_by_bids->{$_} // '') } @bids;
    my $data_for_update = { map { $_ => $domains_by_bids->{$_} } @changed_rows_bids };

    _do_update_aggregator_domains($data_for_update);

    return \@changed_rows_bids;
}

sub _get_enabled_domains {
    my ($client_ids) = @_;
    return {} unless $client_ids && @$client_ids;

    my $allowed_features_by_client_id = Client::ClientFeatures::_get_features_allowed_for_client_ids($client_ids);
    my $result = {};

    foreach my $client_id (@$client_ids) {
        my $allowed_features = { map { $_ => 1 } @{ $allowed_features_by_client_id->{$client_id} // [] } };
        $result->{ $client_id } = { map { $_ => 1 } grep { $allowed_features->{ $FEATURE_NAME_BY_DOMAIN->{$_} } } @ALLOWED_DOMAINS };
    }
    return $result;
}

sub _do_update_aggregator_domains {
    my $aggregator_domains_by_bids = shift;
    return unless keys %$aggregator_domains_by_bids;

    for my $chunk (sharded_chunks(bid => [keys %$aggregator_domains_by_bids])) {
        my $shard = $chunk->{shard};
        my $shard_items = hash_cut($aggregator_domains_by_bids, @{ $chunk->{bid} });
        _do_update_aggregator_domains_in_shard($shard, $shard_items);
    }
}

sub _do_update_aggregator_domains_in_shard {
    my ($shard, $domains_by_bids) = @_;

    my @bids = keys %$domains_by_bids;
    return unless $shard && @bids;

    my ($bids_to_update, $bids_to_delete) = part { $domains_by_bids->{$_} ? 0 : 1 } @bids;

    if ($bids_to_update && @$bids_to_update) {
        my @rows = map { [$_, $domains_by_bids->{$_}] } @$bids_to_update;

        do_mass_insert_sql(PPC(shard => $shard), "
            insert into aggregator_domains (bid, pseudo_domain) values %s
            on duplicate key update pseudo_domain = values(pseudo_domain)
        ", \@rows);
    }

    if ($bids_to_delete && @$bids_to_delete) {
        do_delete_from_table(PPC(shard => $shard), "aggregator_domains", where => {bid => $bids_to_delete});
    }
}

=head2 get_aggregator_domains_by_bids

  Для переданных id баннеров получить из БД домены для кластеризации 

=cut

sub get_aggregator_domains_by_bids {
    my ($bids, %O) = @_;
    my $shard = defined $O{shard} ? $O{shard} : 'all';
    return get_aggregator_domains_by_bids_without_sharding(PPC(shard => $shard), $bids);
}

=head2 get_aggregator_domains_by_bids_without_sharding

  Для указанного инстанса БД получить домены для кластеризации 

=cut

sub get_aggregator_domains_by_bids_without_sharding {
    my ($dbh, $bids) = @_;
    return {} unless $dbh && $bids && @$bids;

    my $domains_by_bids = get_hash_sql($dbh, ['SELECT bid, pseudo_domain FROM aggregator_domains', WHERE => { bid => $bids }]);
    return $domains_by_bids;
}

1;
