package BM::Banners::LBannerBase;
use strict;

use utf8;
use open ':utf8';

use std;
use base qw(ObjLib::ProjPart);

use BM::Phrase;
use BM::Pages::LandingPage;
use Utils::Words;
use Utils::Urls;
use Utils::Dicts qw(load_words_dict);
use Utils::Sys;
use Utils::Funcs qw(decode_quotes);

use List::Util qw(min sum);


########################################################
# Интерфейс
########################################################

#   id                                            id баннера
#   campaign_id                                   номер кампании - может потребоваться для получения настроек для кампании
#   title                                         текст заголовка
#   title_phr                                     возвращает фразу, а не текст
#   body                                          текст тела баннера
#   body_phr                                      возвращает фразу, а не текст

#   phl                                           phrase_list родных фраз

#   domain                                        домен
#   domain_2lvl                                   домен второго уровня, получаемый из урла

#   get_minicategs                                получить список категорий баннера

#   get_search_threshold_filtered($phl, $thld)    фильтрация частотами с учётом региона баннера

__PACKAGE__->mk_accessors(qw(
    id
    title body
    url
    title_extension
    external_minicategs
));

########################################################
# Инициализация
########################################################

sub init {
    my ($self) = @_;
    my $proj = $self->proj;

    $self->{'region'} = $self->{'region'} || $self->{targetting} || '';
    $self->{'max_bid'} = $self->{'max_bid'} || '';
    $self->{'picture_url'} = $self->{'picture_url'} || '';
    $self->{'page'} ||= $proj->page($self->{url});
    $self->{sitelinks} ||= [];
    $self->{minuskeywords} ||= [];

    #temporary warning suppression
    $self->title_extension('') unless defined $self->title_extension;
}

sub _parse_phl {
    my $self = shift;

    my @phrs;
    for my $phr_data (split /,\s*/, $self->{phrases}) {
        my ($text, @inf) = split /:/, $phr_data;
        $text = decode_quotes($text);
        my $phr = $self->proj->phrase($text, \@inf);
        push @phrs, $phr;
    }
    return $self->language->phrase_list(\@phrs);
}

sub phrases_texts :CACHE {
    my $self = shift;

    my @texts;
    for my $phr_data (split /,\s*/, $self->{phrases}) {
        my ($text, @inf) = split /:/, $phr_data;
        $text = decode_quotes($text);
        push @texts, $text;
    }
    return @texts;
}


########################################################
#Методы
########################################################

sub banner {
    my $self = shift;
    return $self->proj->bf->lbanner(@_);
}

sub banner_list {
    my $self = shift;
    return $self->proj->bf->banner_list(@_);
}

# не создаем title/body/phl при инициализации, чтобы не определять лишний раз язык
sub title_phr :CACHE {
    my $self = shift;
    return $self->language->phrase($self->title);
}
sub body_phr :CACHE {
    my $self = shift;
    return $self->language->phrase($self->body);
}
sub phl :CACHE {
    my $self = shift;
    # поле {phrases} преобразуется в phl, однако сырые данные не удаляем,
    # т.к. может почиститься локальный кэш и после этого снова вызван метод ->phl
    return $self->_parse_phl;
}

sub image_hash { return $_[0]->{image_hash} || ""; }
sub phrase_list { return $_[0]->phl; }
sub phrases  {  return $_[0]->phl->phrases;              }
sub region   {  return $_[0]->{region};                  }
sub max_bid  {  return $_[0]->{max_bid};                 }
sub picture_url  {  return $_[0]->{picture_url};                 }
sub minuskeywords  { return $_[0]->{minuskeywords}; }
sub campaign_minuskeywords  { return $_[0]->{campaign_minuskeywords}; }
sub sitelinks  { return $_[0]->{sitelinks}; }

sub page  {  return $_[0]->{'page'}  }

sub domain :CACHE {
    my ($self) = @_;
    return $self->page ? $self->page->domain : '';
}

sub domain_2lvl :CACHE {
    my ($self) = @_;
    return $self->page ? $self->page->domain : '';
}

sub _lang_recognize_phrases {
    my $self = shift;

    my $MAX_NUM_PHRASES = 100;
    my @phrases = $self->phrases_texts;
    my $phrases_text = join(' ', @phrases[0..min($MAX_NUM_PHRASES - 1, $#phrases)]);
    my $lang = $self->proj->phrase($phrases_text)->lang_recognize();

    return $lang eq 'en' ? 'ru' : $lang;  # set 'ru' due to bad recall of English categorization
}

sub _lang_recognize {
    my $self = shift;

    my $lang = $self->proj->phrase($self->concat_title_body)->lang_recognize();

    return $lang eq 'unknown' ? $self->_lang_recognize_phrases() : $lang;
}

sub lang {
    my $self = shift;

    return $self->{'lang'} //= $self->_lang_recognize();
}

#Категории с подъёмом вверх по иерархии
sub upcategs :CACHE {
    my ($self) = @_;
    return $self->banner_text_phrase->get_banner_prefiltered_phrase->get_upcategs;
}

sub get_minicategs_method {
    my $self = shift;
    $self->get_minicategs;
    return $self->{get_minicategs_method};
}

sub get_nontext_minicategs :CACHE{
    my $self = shift;

    my @cts = $self->external_minicategs ? @{$self->external_minicategs} : ();

    # если возможно, определяем категорию по домену
    if(!@cts && $self->domain) {
        @cts = $self->language->pages_categories->get_static_domain_categs($self->urlpage);
    }
    return @cts;
}

sub get_minicategs      :CACHE {
    my $self = shift;

    $self->{get_minicategs_method} = "Внешняя категоризация";
    return @{$self->external_minicategs} if $self->external_minicategs;

    $self->{get_minicategs_method} = "Категоризация по нетекстовым параметрам (домен)";
    my @cts = $self->get_nontext_minicategs;
    return @cts if @cts;

    $self->{get_minicategs_method} = "Префильтрованный Title+body";
    @cts = $self->get_title_body_minicategs;

    if(!@cts && !$self->{dont_use_phrases_minicategs}){
        $self->{get_minicategs_method} = "Родные фразы";
        @cts = $self->get_phrases_minicategs;
    }

    return @cts;
}

sub get_minicategs_directids :CACHE {
    my $self = shift;
    my $ct = $self->proj->categs_tree;
    return map{ $ct->get_minicateg_directid($_)} $self->get_minicategs;
}

sub get_minicategs_ids :CACHE {
    my $self = shift;
    my $ct = $self->proj->categs_tree;
    return grep {defined} map {$ct->get_minicateg_id($_)} $self->get_minicategs;
}

sub get_minicategs_rltd {
    my ($self) = @_;
    my @ctgs = $self->get_minicategs;
    my $ph = $self->proj->phrase; #Для доступа к вызовам категоризации
    my @nlist = (
        @ctgs,                                   #Сами категории
        $ph->get_related_minicategs(@ctgs),      #Сиблинги и невьюсы
        ( map { $ph->get_cat_path($_) } @ctgs ), #Путь наверх
    );
    @nlist = keys %{{ map {$_=>1} @nlist }};
    return @nlist;
}

sub get_categs_phrases {
    my ($self) = @_;
    my $h = $self->preprocess_title_body->decode_minicategs_subphrases_hash();
    my @res = ();
    for my $ph (keys %$h){
        push(@res, [ $_, $ph ]) for keys %{$h->{$ph}};
    }
    return \@res;
}

sub get_categs_phrases_hlist {
    my ($self) = @_;
    return [ map { { category => $_->[0], phrase => $_->[1] } } @{$self->get_categs_phrases} ];
}

sub title_body_domain_phr :CACHE {
    my ($self) = @_;
    return $self->language->phrase( join(' ', $self->concat_title_body, $self->get_domain_word) );
}

sub title_body_phr :CACHE {
    my ($self) = @_;
    return $self->language->phrase($self->concat_title_body);
}

#текст для парсинга и извлечения информации
sub banner_text_phrase :CACHE {
    my ($self) = @_;
    my $btext = '';
    if( $self->{'title'} || $self->{'body'} ){
        $btext = $self->concat_title_body;
        $btext =~ s/#/ /g;
    }elsif( $self->page ){
        $btext = $self->page->name || $self->page->get_pagename;
    }
    return $self->proj->phrase($btext);
}

sub get_domain_word :CACHE {
    my ($self) = @_;
    my $domain = $self->domain;

    $domain =~ s/[\.\/\:]/_/g;

    return "domain_$domain";
}

sub preprocess_title_body   :CACHE {
    my $self = shift;

    my $ph = $self->title_body_domain_phr;

    return $ph->get_banner_prefiltered_phrase;
}

#@returns BM::Banners::Campaign
sub campaign_obj {
    my $self = shift;
    return ${$self->{campaign_obj_wkn}} if $self->{campaign_obj_wkn};
    return $self->proj->bf->campaign($self->campaign_id);
}

sub get_title_body_minicategs      :CACHE {
    my $self = shift;
    my $ph = $self->preprocess_title_body;
    $ph->{dont_use_minicategs_cache} = 1;
    my $cts = $self->_compute_minicategs_stats([$ph]);

    # второй слой
    $cts = {map{$_ => 1} $ph->get_uncertain_minicategs} if !%$cts;

    # если категории не определились, пытаемся развалить слова с дефисами
    if (not %$cts and $ph->snorm_phr =~ /\-/) {
        my $s = $ph->snorm_phr;
        $s =~ s/\-/ /g;
        $cts = $self->_compute_minicategs_stats([$self->proj->phrase($s)]);
    }

    return sort keys %$cts;
}

sub text {
    my ($self) = @_;
    return
        $self->title."\n"
       .$self->title_extension."\n"
       .$self->body."\n"
       .$self->url."\n"
       .join("", map { "\t$_\n" } $self->phrases )
       ."\n";
}

sub fulltext {
    my ($self) = @_;
    return
        $self->id."\n"
       .$self->title."\n"
       .$self->title_extension."\n"
       .$self->body."\n"
       .$self->url."\n"
       .join("", map { "\t".$_."\n" } @{$self->phl} )
       .$self->region."\n"
       .$self->max_bid."\n"
       .$self->picture_url."\n"
       ."\n";
}

sub region_arrref :CACHE {
    my ($self) = @_;
    return [ split ',', $self->region ];
}

#Не тестировалась
sub get_search_threshold_filtered {
    my ($self, $phl, $threshold) = @_;
    return $phl->get_search_threshold_filtered(($threshold // 0 ), $self->region_arrref);
}

sub delete_useless_minus_words {
    my ($self, $phl, $threshold) = @_;
    return $phl->delete_useless_minus_words(($threshold // 0 ), $self->region_arrref);
}


sub concat_title_body {
    my $self = shift;

    my @texts = grep { /\S/ } (
        $self->title,
        $self->title_extension,
        $self->body,
    );

    for my $i (0..$#texts-1) {
        if ($texts[$i+1] =~ /^\s*\p{Uppercase}/ && $texts[$i] !~ /[\.\!\?]\s*$/) {
            $texts[$i] .= ".";
        }
    }

    return join(" ", @texts);
}

sub split_sentences {
    my $self = shift;

    my $text = $self->concat_title_body;
    my $phr = $self->proj->phrase($text);

    return $phr->sentences;
}

use overload
  'bool' => sub {
            my ($self) = @_;
            return 1;
        },
   '""' => sub {
            my ($self) = @_;
            return $self->text;
        };

1;


