package BM::BannersMaker::Tasks::DynTask;

use utf8;
use open ':utf8';

use std;
use base qw(BM::BannersMaker::Tasks::Task);

use Data::Dumper;

use Encode;
use Encode qw{_utf8_off};

use URI::Escape;

use Storable qw(dclone);
use File::Slurp qw(read_file);
use File::Copy qw(move cp);

use POSIX qw(strftime);
use List::Util qw(sum maxstr);

use Utils::CompileTime;
use Utils::Sys qw(
    md5int
    load_json
);
use Utils::Urls qw(
    url_to_domain url_to_punycode
    get_sec_level_domain
    safe_punycode_decode
);
use Utils::Array qw(uniq_array uniq_array_ref_deep);
use Utils::Sys qw(md5int);
use Utils::Hosts qw(
    get_host_role
);
use Utils::Regions qw/get_countries/;

#КЛЮЧИ
#занято: s,n,a,b,c,d,e и что-то еще возможно

use constant default_source_letter => 'n';
use constant max_dummy_banners_per_feed => 100;

########################################################
#Доступ к полям
########################################################

__PACKAGE__->mk_accessors(qw(
));

########################################################
# Интерфейс
########################################################


########################################################
# Инициализация
########################################################

my $task_json = '';


sub init {
    my ($self) = @_;
    my $task = $self->taskinf;
    $task_json = $self->proj->serial([$task]);

    my $targdata = $task->{Resource}{Targets}; #Фильтры для фида
    $task->{Resource}{DirectTargets} = dclone($task->{Resource}{Targets} // []); #Сохраняем исходный фильтр

    if ($task->{Resource}{Body}) {
        $task->{Resource}{Body} = $self->proj->make_bs_compatible_or_empty($task->{Resource}{Body}) || $task->{Resource}{Body};
    }

    #Исправляем формат Директа, так как разошлась логика представления фильтров
    if (1) {
         if( ref($targdata) eq 'ARRAY' ) {
             my $targets = {};
             $task->{Resource}{SpecUrls} ||= [];        #Структура для тасков по товарам с урлов
             $task->{Resource}{SpecFilters} ||= [];     #Структура для фильтров по товарам с урлов
             for my $cond (@$targdata){
                 next if ( $cond->{Condition} && ref($cond->{Condition}) eq 'HASH' ); # логика с массивом Condition - только для спецурлов, в других случаях приходит хеш
                 $_->{type} = 'url' for grep { $_->{type} eq 'URL' } @{$cond->{Condition}}; #Исправляем имя поля урла
                 #Особый случай фильтрации, когда не должно быть товаров с урл
                 if( grep { (defined($_->{kind}) && $_->{kind} eq 'not_equals') } @{$cond->{Condition}} ){ #Особый случай фильтрации, когда не должно быть товаров с урла
                     my $spec_cond = [ grep { ($_->{kind} eq 'not_equals') } @{$cond->{Condition}} ]; #Получаем специальные фильтры
                     push( @{ $task->{Resource}{SpecFilters} }, [ $cond->{DynamicConditionID}, $spec_cond ] ); #Сохраняем фильтры в специальную структуру
                     $cond->{Condition} = [ grep { ($_->{kind} ne 'not_equals') } @{$cond->{Condition}} ]; #Убираем условия из обычного фильтра
                 }
                 #Особый случай, когда урл приходит как отдельный таск
                 if( grep { (defined($_->{kind}) && $_->{kind} eq 'equals') } @{$cond->{Condition}} ){ #Особый случай, когда урл приходит как отдельный таск
                     my $spec_urls = [ grep { ($_->{kind} eq 'equals') } @{$cond->{Condition}} ]; #Получаем специальные урлы для обхода
                     my $filter = [ grep { ($_->{kind} ne 'equals') } @{$cond->{Condition}} ];
                     $filter = BM::Filter::ext2bm($filter) || {};
                     push( @{ $task->{Resource}{SpecUrls} }, [ $cond->{DynamicConditionID}, $spec_urls, $filter ] );
                 }else{ #Обычный обход сайта
                     #Преобразование фильтра к стандартному виду
                     $targets->{$cond->{DynamicConditionID}} = BM::Filter::ext2bm($cond->{Condition}) || {};
                 }
             }

            # костыль для feedurl, к-рый потом нужно оторвать
             if ( $task->{Resource}{FeedUrl} ){
                 if((ref($task->{Resource}{Targets}) eq 'ARRAY') && (exists($task->{Resource}{Targets}[0]{DynamicConditionID}))){ #Директ снова прислала фильтры не в том формате, исправляем
                    $task->{Resource}{Targets} = { map { $_->{DynamicConditionID} => $_->{Condition} } map {@$_} $task->{Resource}{Targets} };
                  }
             }else{
                 $task->{Resource}{Targets} = $targets;
             }
         }
    }

    $self->{'banners_dirs_param'} = 'dyn_banners_dirs';

    $self->check_deprecated_options();
}

sub banners_method_name {
    return 'dyn_banners';
}

sub domains_remove_get_params {
    my ($self) = @_;
    return $self->proj->options->{domains_remove_get_params}->{dyn};
}

sub _source_letter_2_name_hash :CACHE {
    my $self = shift;
    my %name = (
        'p' => 'specurls',
        'm' => 'dse',
    );
    for my $source (@{$self->get_external_sources}) {
        $name{$source->{letter}} = 'external';
    }
    return \%name;
}

sub source_letter_2_name {
    my $self = shift;
    my $letter = shift;
    my $default_name = 'offers';
    my $l2n = $self->_source_letter_2_name_hash;
    return $l2n->{$letter} // $default_name;
};

# для кэширования информации не в объекте, а в taskinf, с последующим использованием в YT
sub taskinf_cache {
    my $self = shift;
    return ($self->taskinf->{_cache} //= {});
}

########################################################
# Методы
########################################################

sub get_geo_countries :CACHE {
    my ($self) = @_;
    my $task = $self->taskinf;
    my $geo_str = $task->{Resource}{Geo} || '';
    my $geo = [split ",", $geo_str];
    return $geo ? [get_countries($geo)] : [];
}

#Получение списка тасков
#Вынесено в этот модуль, чтобы всё было в одном месте
sub get_all_tasks_from_yt_table {
    my $self = shift;
    my $table = shift // $self->get_default_input_tasks_yt_table();
    my $proj = $self->proj;

    $proj->log("Starting superfunction 'get_all_tasks_from_yt_table'");
    my $tasks = $self->SUPER::get_all_tasks_from_yt_table($table);
    $proj->log("Finishing superfunction 'get_all_tasks_from_yt_table'");

    return $tasks;
}

sub get_export_offers_dir {
    my $self = shift;
    $self->proj->options->{dyn_export_offers_dir};
}
sub get_svn_revision_file {
    my $self = shift;
    return $self->proj->options->{dyn_svn_revision_file};
}

sub get_default_params {
    my ($self) = @_;
    return { work => "work" };
}

sub ParentExportID :CACHE {
    my $self = shift;
    return $self->get_value_from_list_field('ParentExportIDs');
}

sub TemplateID :CACHE {
    my $self = shift;
    return $self->get_value_from_list_field('TemplateIDs');
}

# см. также аналогичные методы PerfTask
sub get_BannerHash {
    my $self = shift;
    my $bp = shift;  # banner-phrase hash

    my $bnr_str = join('_',
        $bp->{Title}, $bp->{Body}, $bp->{CanonizedUrl}, $self->taskinf->{OrderID}, $self->ParentExportID,
    );
    return md5int($bnr_str);
}

sub get_BannerID {
    my $self = shift;
    my $bp = shift;
    my $BannerHash = $self->get_BannerHash($bp);
    my $high_byte_in_banner_id = 3;
    my $BannerID = ($BannerHash & ((1 << 56) - 1)) + $high_byte_in_banner_id * (1 << 56);
    return $BannerID;
}


#Домен таска
sub domain :CACHE {
    my ($self) = @_;
    return $self->{extdomain} if defined $self->{extdomain};
    my $task = $self->taskinf;
    my $dmn = $task->{Domain} || $task->{Resource}{Domain};
    $dmn = $self->feed_or_datacamp_domain if ( !$dmn && $self->feedurl );
    return $dmn;
}

sub redirect_domain {
    my ($self) = @_;
    my $cache = $self->taskinf_cache;
    return $cache->{redirect_domain} if defined $cache->{redirect_domain};

    my $page = $self->proj->page($self->domain);

    $page->{no_cache} = 1;
    $page->text;

    my $result = '';
    if($page->{location}) {
        $result = $self->proj->page($page->{location})->domain;
    }

    $cache->{redirect_domain} = $result;
    return $result;
}

sub punycode_domain :CACHE {
    my ($self) = @_;
    my $url = url_to_punycode($self->domain);

    return $self->proj->page($url)->domain;
}

sub specfilters {
    my ($self) = @_;
    my $task = $self->taskinf;
    my $cache = $self->taskinf_cache;
    return $cache->{specfilters} if $cache->{specfilters};

    #Добавляем специальные фильтры по разделам сайта
    my $newfilters = {};
    my $specfilters = $task->{Resource}{SpecFilters};
    print STDERR Dumper(['specfilters', $specfilters]);
    if(defined($specfilters)&&(@$specfilters)){
        for my $el (@$specfilters){
            my ($fltid, $cndlst) = @$el;
            for my $cnd (@$cndlst){
                my @cnd_urls;

                if(ref($cnd->{value}) eq "ARRAY") {
                    @cnd_urls = @{$cnd->{value}};
                } else {
                    @cnd_urls = ($cnd->{value});
                }

                for my $cnd_url (@cnd_urls) {
                    my $nd = $self->site->node($cnd_url);
                    $newfilters->{$fltid}{$_->norm_url}++ for @{$nd->goods};
                }
            }
        }
    }

    $cache->{specfilters} = $newfilters;
    return $newfilters;
}

sub fix_url_by_task {
    my ($self, $url) = @_;
    my $task = $self->taskinf;

    return $url if $task->{OrderID} == 5863096; #Женя просил захардкорить этот номер для тестовых заказов

    if ($task && (($task->{Resource}{RemoveUtm}||'') eq 'YES') || (($task->{Resource}{RemoveUtm}||'') eq 'Yes')) { #удаляем utm только если выставлено в настройках таска
        $url =~ s/utm_(?:[_a-z0-9]+)=[^\&]+\&?//gi; #удаление меток
        $url =~ s/(\?|\&)$//g;
    }

    $url =~ s/from=YM\&//g; #удаление меток, удаляем маркетный флаг
    $url = $self->transf_url_macros($url); #Раскрытие макросов от Директа

    # url_params из $self->taskinf->{Resource}{HrefParams} || $self->get_url_param_from_db  отключили, так как уже доклеивается в БК
    # dmnfix больше не используется!

    return $url;
}

sub site :CACHE {
    my ($self) = @_;

    return $self->{ext_site} if defined $self->{ext_site};

    #Построение иерархии сайта
    my $s = $self->proj->site($self->domain); #Созаём объект сайта
    $s = $s->check_redir; #Проверяем, нет ли редиректа на другой домен
    $s->{dbg} = 1;
    $s->{enable_lwp_whitelist} = 1;

    return $s;
}

sub main_mirror :CACHE {
    my ($self) = @_;
    return $self->site->main_mirror;
}

=h
CREATE TABLE `DynTasks` (
  `TaskID`  varchar(200),
  `OrderID` int(11) NOT NULL,
  `BannerIDs`  varchar(200),
  `ParentExportIDs` varchar(200),
  `cronlight_id` varchar(250),
  `Domain` varchar(200),
  `Geo` varchar(200),
  `Body` varchar(200),
  `firsttime` datetime DEFAULT NULL,
  `lasttime` datetime DEFAULT NULL,
  `begin` datetime DEFAULT NULL,
  `end` datetime DEFAULT NULL,
  `prev_begin` datetime DEFAULT NULL,
  `prev_end` datetime DEFAULT NULL,
  `errors` text,
  `taskinf` text,
  `resinf` text,
  `srccount` int(11) NOT NULL,  # не используется
  `offerscount` int(11) NOT NULL,
  `resultcount` int(11) NOT NULL,
  `currentstatus` varchar(200),
  `stagelog` text,
  `stat` text,
  PRIMARY KEY (`TaskID`)
)
=cut
sub tablename :CACHE {
    my ($self) = @_;
    return 'DynTasks';
}

sub domain_tablename :CACHE {
    my ($self) = @_;
    return 'DynDomains';
}

sub action_before_task {
    my ($self) = @_;
    my $proj = $self->proj;
    $self->globallog("BEGIN TaskID=".$self->task_id." domain=".$self->domain);
    my $msg = "=== begin domain ".$self->domain." ===";
    $proj->log($msg);
    $self->SUPER::action_before_task; #Тут происходит переключение логов
    $proj->log($msg);
}

sub action_after_task {
    my ($self) = @_;
    my $proj = $self->proj;
    my $msg = "=== end domain ".$self->domain." ===";
    $proj->log($msg);
    $self->SUPER::action_after_task; #Тут происходит переключение логов
    $proj->log($msg);

    $self->globallog("END TaskID=".$self->task_id." domain=".$self->domain." stat=".($self->{result_stat} || ''));
}


sub get_debug_site_info {
    my ($self) = @_;

    return if $self->{ext_tskv_gen};

    my $fileinfo = $self->fileformat("siteinfo");

    my $s = $self->site;
    my $nd = $s->mainnode; #Получение главного нода

    #Печатаем отладочную информацию по сайту
    open(my $fhi, "> $fileinfo"."_tmp");
    $s->{fh} = $fhi;

    print $fhi "dmn: ".$self->domain."\n";
    print $fhi "mainpage: ".$nd->pg."\n";
    print $fhi "mainpage: ".$nd->pg."\n";
    print $fhi "taskinf: ".Dumper($self->taskinf)."\n";
    print $fhi "filters: ".Dumper($self->filters)."\n";
    print $fhi "\n=== menu ===\n";
    $s->menu_pages->debug_print("menupages", $fhi);
    print $fhi "=== /menu ===\n\n";
    print $fhi "\n=== mainpage subsections ===\n";
    $nd->subsections->debug_print("subsections", $fhi);
    print $fhi "=== / mainpage subsections ===\n\n";
    print $fhi "\n=== mainpage not_subsections ===\n";
    $nd->not_subsections->debug_print("not_subsections", $fhi);
    print $fhi "=== / mainpage not_subsections ===\n\n";
    print $fhi "\n=== mainpage filter_metalinks_debug ===\n";
    print $fhi $nd->pg->get_internal_subpages->filter_metalinks_debug;
    print $fhi "=== / mainpage filter_metalinks_debug ===\n\n";
    print $fhi "\n=== mainpage nodes ===\n";
    $nd->ndl->debug_print("mpnodes", $fhi);
    print $fhi "=== / mainpage nodes ===\n\n";
    print $fhi "\n=== freq_tmpl_stat ===\n";
    print $fhi Dumper($s->freq_tmpl_stat);
    print $fhi "=== / freq_tmpl_stat ===\n\n";
    print $fhi "\n=== freq_url_texts ===\n";
    print $fhi Dumper($s->freq_url_texts);
    print $fhi "=== / freq_url_texts ===\n\n";
    print $fhi "\n=== mainpage good_internal_subpages ===\n";
    $nd->pg->good_internal_subpages->debug_print("gis", $fhi);
    print $fhi "=== / mainpage good_internal_subpages ===\n\n";
    print $fhi "\n=== freqhash_suburls ===\n";
    print $fhi Dumper($s->mainpage->get_freqhash_suburls);
    print $fhi "=== / freqhash_suburls ===\n\n";
    #print $fhi "\n=== freqhash_suburls src ===\n";
    #print $fhi Dumper($s->mainpage->get_internal_subpages->filter_metalinks->get_frequenti_all_suburls_arr);
    #print $fhi "=== / freqhash_suburls src ===\n\n";

    print $fhi "\n=== mainpage metalinks ===\n";
    my $pgl = $nd->pg->get_internal_subpages;
    my $fpgl = $pgl->filter_metalinks;
    my $mpgl = $pgl - $fpgl;
    $mpgl->debug_print("mpmetalinks", $fhi);
    print $fhi "=== / mainpage metalinks ===\n\n";

    print $fhi "\nspecial_goods_url_filter_re='".$s->special_goods_url_filter_re."'\n\n";

    close($fhi);
    $self->_tmp2file($fileinfo);
    #/Печатаем отладочную информацию по сайту
}

sub specurls_offers_fd {
    my $self = shift;
    my %par  = @_;

    my $proj = $self->proj;
    my $task = $self->taskinf;
    my $srurls = $task->{Resource}{SpecUrls};
    my $filespecurls = $self->fileformat("specurls");
    my @inf;
    open(F, "> ${filespecurls}_tmp");
    if(defined($srurls)&&(@$srurls)){
        $proj->log("specurls count: ".@$srurls);
        my $begin_time = time;
        OUTER:
        for my $suh ( @$srurls ){ #Список фильтров с урлами
            my ($fltid, $list, $fltrs) = @$suh;
            for my $ft ( @$list ){  #Список урлов конкретного фильтра
                my @urls;

                if(ref($ft->{value}) eq "ARRAY") {
                    $proj->log("ft->{value} is ARRAY!");
                    @urls = @{ $ft->{value} };
                } else {
                    @urls = ( $ft->{value} );
                }

                for my $url (@urls) {
                    if (time - $begin_time > 20 * 3600) {
                        $self->log("Too long specurls");
                        $self->set_debug_info("too_long_task", 1);
                        last OUTER;
                    }
                    $proj->log("start creating feed for specurl $url");
                    # обходим сайт по спецурлу
                    my $data = $self->site->get_url_tskv_feed($url);
                    print F $data;
                    # NOTE: дубли по merge_key не удаляются
                    push @inf, {
                        data => $data,
                        datatype => 'offers_tskv',
                        filters => { $fltid => $fltrs },
                        source_letter => 'p',
                        offer_source => 'specurl',
                    };
                }
            }
        }
    }
    close(F);
    return @inf if $par{ret_inf};

    my @arr = map { $proj->feed($_) } @inf;
    $self->proj->log("specurls_offers_fd feeds ".@arr);
    return @arr;
}

sub filenames :CACHE {
    my ($self) = @_;
    my $h = {};
    $h->{$_} = $self->fileformat($_) for qw{sitetree sitetskv filterinf taskjson feed feedtskv tskv_gen};
    $h->{tskv_gen} = $self->{ext_tskv_gen} if $self->{ext_tskv_gen};
    $h->{sitetskv_pkd} = $h->{sitetskv}."_pkd";
    return $h;
}

sub get_domain_file {
    my ($self, $dir) = @_;
    my @chdomains = ( $self->domain, 'www.'.$self->domain, $self->punycode_domain );
    for my $cdmn (@chdomains){
        my $file = "$dir/$cdmn";
        if( -e $file ){
            return $file;
        }
    }
    return undef;
}

sub merge_tskv_gen {
    my ($self) = @_;

    my $proj = $self->proj;

    $proj->log("Start merge tskv");

    my $feeds = [];

    my $crawler_feed_filename = $self->filenames->{sitetskv_pkd};
    if (-e $crawler_feed_filename) {
        $proj->log("crawler feed exists");
        push @$feeds, [ $crawler_feed_filename, 'crawler', 'n' ];
    }

    my $dse_feed_filename = $self->fileformat('dse');
    my $dse_options = $proj->options->{DynSources}->{dse};

    unless ($self->download_source_file(
        $dse_options->{yt_path_domain},
        $dse_feed_filename,
        $self->get_source_key($self->domain),
    )) {
        my $last_file = $self->find_last_format_file('dse');
        if ($last_file) {
            $proj->log("Download dse failed; copy last dse file from: $last_file");
            cp($last_file, $dse_feed_filename);
        }
    }
    if (-f $dse_feed_filename && -s $dse_feed_filename) {
        $proj->log("dse feed exists");
        push @$feeds, [ $dse_feed_filename, 'dse', $dse_options->{letter}, "product_type=dse\t" ];
    }

    my $merge_feed_filename = $self->filenames->{tskv_gen};
    open(my $merge_feed, "> ${merge_feed_filename}_tmp");

    my $count_all = 0;
    for (@$feeds) {
        my ($feed_filename, $offer_source, $letter, $additional) = @$_;
        $additional //= '';
        $proj->log("feed $feed_filename start");
        open (my $feed, "< $feed_filename");
        my $c = 0;
        my $dummy_banners_count = 0;
        while (defined(my $row = <$feed>)) {
            if ($row =~ /(?:^|\t)url=([^\t]+)(?:\n|\t|$)/) {
                my $url = $1;
                if ($row =~ /(?:^|\t)bl_phrases=(?:\t|\n|$)/) {
                    if (++$dummy_banners_count > max_dummy_banners_per_feed()) {
                        next;
                    }
                }
                if (url_to_punycode(url_to_domain($url)) ne url_to_punycode(url_to_domain($self->domain))) {
                    next;
                }
                print $merge_feed "${additional}offer_source=$offer_source\tsource_letter=$letter\t$row";
                $c += 1;
                if ($letter eq 'v' && $c > $count_all * 0.05) {
                    $proj->log("links source limited: $c / $count_all");
                    last;
                }
                my $m = $count_all * 0.01;
                $m = 10 if $m < 10;
                if ($letter eq 'l' && $c > $m) {
                    $proj->log("dse exp source limited: $c / $count_all");
                    last;
                }
            } else {
                $proj->log("WARNING: offer without url");
            }
        }
        close($feed);
        $count_all += $c;
    }

    close($merge_feed);
    $self->_tmp2file($merge_feed_filename);
    $self->process_merge_keys($merge_feed_filename, $merge_feed_filename);
    $proj->file($merge_feed_filename)->shuf(0);

    $proj->log("End merge tskv");
}

# Возвращает 1, если tskv успешно создан
sub renew_tskv {
    my ($self) = @_;

    # DYNSMART-195: фильтруем плохие домены вроде vk.com, yandex.ru, их указывают по ошибке
    if ((! $self->feedurl) && $self->is_bad_domain) {
        $self->proj->log("Bad domain");
        $self->set_debug_info("bad_domain", 1);
        $self->set_domain_params({
            tskv_time => $self->curtime,
            tskv_size => 0,
            tree_size => 0,
        });
        return 0;
    }

    my $s = $self->site;
    $s->timeout(60);
    my ($prevz, $prevzf) = ($s->{zora}, $s->{enable_zora_fast});
    $s->{zora} = 1;
    $s->{enable_zora_fast} = 1;

    my $begtime = time;

    # Скачивание файлов tskv_gen
    if ($self->taskinf->{Resource}{__BL_FILES__}{TskvGen}) {
        my $tskv_gen_info = $self->taskinf->{Resource}{__BL_FILES__}{TskvGen};
        my @tskv_gen_files = qw(sitetskv_pkd tskv_gen sitetree);
        for my $file_name (@tskv_gen_files) {
            my $url = $tskv_gen_info->{$file_name};
            my $file_path = $self->filenames->{$file_name};
            my $load_status = $self->proj->load_file_by_url($url, $file_path, timeout => 3600);
            die("Failed to load $url") unless $load_status;
        }
    }
    elsif(! $self->{ext_tskv_gen}){
        #костыль, чтобы отключить нативную генерацию у букинга и некоторых других крупных клиентов
        #нужно заменить на механизм, который разрабатывается в DYNSMART-635
        if ( !$self->proj->options->{dyn_banners_dse_only}->{$self->site->domain_2lvl} ) {
            #Печатаем отладочную информацию по сайту в отдельный файл
            $self->get_debug_site_info;

            $self->stagelog("=== end info ".$self->domain." === ".$self->curtime." ===");
            #Печатаем дерево разделов
            $self->data2file( $self->filenames->{sitetree}, sub {
                my ($self, $fh) = @_;
                print $fh $self->site->gen_site_tree;
            });
            #/Печатаем дерево разделов

            $self->stagelog("=== end tree ".$self->domain." === ".$self->curtime." ===");

            #Получаем фид моделей в формате tskv
            $self->site->get_site_tskv_feed_file($self->filenames->{sitetskv}, $begtime);
            $self->set_debug_info("timeout", 1) if $self->site->{tskv_timeout};
            #/Получаем фид моделей в формате tskv
        }
        else {
            $self->stagelog("=== skip crawler ".$self->domain." === ".$self->curtime." ===");
            #делаем пустые файлы дерева и фида
            foreach my $filename ( ($self->filenames->{sitetskv}, $self->filenames->{sitetree}) ) {
                my $fh;
                open $fh, '>', $filename and close $fh
                or die "Failed to create $filename: $!\n";
            }
        }

        $self->stagelog("=== end tskv ".$self->domain." === ".$self->curtime." ===");

        #Сжатие повторов текстов
        my $packcmd = "cat ".$self->filenames->{sitetskv}." | sort -t\$'\\t' -k3,3 -k1,1r | sort -t\$'\\t' -k3,3 -u > ".$self->filenames->{sitetskv_pkd}; #3 поле - текст, 1 поле - иерархия категорий
        $self->proj->do_sys_cmd($packcmd);
        $self->stagelog("=== end packtskv ".$self->domain." === ".$self->curtime." ===");

        $self->merge_tskv_gen;
    }
    if ($self->taskinf->{Resource}{__BL_FILES__}{TskvGen} || !$self->{ext_tskv_gen}) {
        #Собираем информацию для диагностики проблем
        my $tskv_size = $self->proj->file($self->filenames->{sitetskv_pkd})->wc_l;
        my $tskv_gen_size = $self->proj->file($self->filenames->{tskv_gen})->wc_l;
        my $tree_size = $self->proj->file($self->filenames->{sitetree})->wc_l;
        $self->set_debug_info('tree_size', $tree_size);
        $self->set_debug_info('tskv_size', $tskv_size);
        $self->set_debug_info('tskv_gen_size', $tskv_gen_size);
    }
    ($s->{zora}, $s->{enable_zora_fast}) = ($prevz, $prevzf);
    return 1;
}

sub set_debug_info {
    my ($self, $key, $value) = @_;

    if($self->{debug_info}) {
        $self->{debug_info}{$key} = $value;
    }
}

sub get_debug_info_hash {
    my ($self) = @_;

    return $self->{debug_info};
}

sub get_domains_regexp :CACHE {
    my $self = shift;
    my @domains = ($self->domain);

    if($self->redirect_domain && $self->redirect_domain ne $self->domain) {
        push @domains, $self->redirect_domain;
    }

    if($self->punycode_domain ne $self->domain) {
        push @domains, $self->punycode_domain;
    }

    my @domain_res;
    for my $domain (@domains) {
        my $domain_re = lc($domain);
        $domain_re =~ s/^https?:\/\///;
        $domain_re =~ s/\/$//;
        $domain_re =~ s/www\.//;
        push @domain_res, $domain_re;
    }

    my $domains_re = join("|", @domain_res);
    $self->log("domain filter regex: /$domains_re/");

    return $domains_re;
}

sub check_offer_domain {
    my $self = shift;
    my $pt = shift;
    my $domains_re = $self->get_domains_regexp;
    return (url_to_punycode($pt->url) =~ /$domains_re/) ? 1 : 0;
}

sub worker_copy {
    my ($self) = @_;

    my $proj = $self->proj;
    my $funnel_msg = "copying_last_generation_results\n";

    my $from_tid = $self->{copy_from};
    my $tid = $self->task_id;
    my $dir = $self->dir;
    my $from_dir = $dir;
    $from_dir =~ s/$tid/$from_tid/;

    $proj->log("copy tao from $from_dir to $dir");

    opendir my($dh), $from_dir;
    my @files = readdir $dh;
    closedir $dh;

    my $last_eo = maxstr(grep { /_export_offers$/ } @files);
    if (!$last_eo) {
        $self->set_params({ FunnelInfo => $funnel_msg."no_last_export_offers_files", });
        return 0;
    }

    my $eo_res = $self->do_export_offers(copy_from => "$from_dir/$last_eo") // {};
    if ($eo_res->{export_info}) {
        $self->set_params($self->get_export_offers_resparams($eo_res->{export_info}));
    }
    return 1;
}

sub get_direct2bsdata :CACHE {
    my ($self) = @_;
    my $task = $self->taskinf;
    return '{"OrderID": '.$task->{OrderID}.', "ParentExportIDs": [ '. join(',', @{$task->{ParentExportIDs} || []} ) .' ]}';
}

sub get_checksum {
    my ($url) = @_;
    return ((md5int($url) & 0xffffffff) >> 1);
}

sub bad_domains :GLOBALCACHE {
    my $self = shift;
    my %bad;
    open my $fh, '<', $self->proj->options->{dicts} . '/dyn_bad_domains'
        or die "Can't open bad_domains: $!";
    while (my $row = <$fh>) {
        chomp $row;
        $bad{$row} = 1;
    }
    close $fh;
    return \%bad;
}

sub bad_domains_2level :GLOBALCACHE {
    my $self = shift;
    my %bad;
    open my $fh, '<', $self->proj->options->{dicts} . '/dyn_bad_domains_2level'
        or die "Can't open bad_domains_2level: $!";
    while (my $row = <$fh>) {
        chomp $row;
        $bad{$row} = 1;
    }
    close $fh;
    return \%bad;
}

sub is_bad_domain {
    my ($self) = @_;
    my $domain = url_to_domain($self->domain);
    return 1 if $self->bad_domains->{$domain};
    return 1 if $self->bad_domains_2level->{get_sec_level_domain($domain)};
    return 0;
}

# готовим фиды для обработки
sub get_feeddata {
    my $self = shift;

    my $proj = $self->proj;
    my $task = $self->taskinf;

    my @main_feeds;  # "основные источники"

    my $feedurl = $self->feedurl;
    my $filters = $self->filters;
    my $product_domain = $self->domain;
    my $file_tskv_gen = $self->filenames->{"tskv_gen"};
    my $dmn = $self->domain;

    if(keys %$filters){
        my $offers_feed;
        $proj->log("worker filters count ".(keys %$filters));

        unless ($feedurl) {
            unless ($self->renew_tskv) {
                $proj->log("Renew tskv with error. End task.");
                $self->set_params({ FunnelInfo => "task_skipped_tskv_renewed_with_error", });
                return;
            }
        }

        # если пришел фид в урле
        if ( $feedurl ){
            $proj->log("Create feed by url: $feedurl");
            $self->prepare_feeddata();
            my $filetskv_mpd_feeddata = $self->load_feed_info->{tskv_mpd};

            # создаем новый фид по tskv с фильтрами
            $proj->log("creating feed by file $filetskv_mpd_feeddata");
            $offers_feed = $self->get_feed_by_tskv_mpd($filetskv_mpd_feeddata);

            $self->set_params({ Domain => $self->feed_or_datacamp_domain });
            $self->_save_feedtskv_files( $offers_feed );
            my $domain = $offers_feed->get_first_product_domain;
            if ($domain) {
                $product_domain = $domain;
                $self->set_params({ Domain => $domain });
            }

            if ($offers_feed->is_empty) {
                $proj->log("ERROR: feed is empty");
                $self->set_params({ FunnelInfo => "ERROR: feed is empty", });
                return;
            }
        } else {
        # берем из обходчика
            unless(-e $file_tskv_gen){
                my $msg = "=== bad tskv file for $dmn === file: $file_tskv_gen";
                $self->stagelog($msg);
                print STDERR "$msg\n";
            }else{
                #Получение объекта фида
                $proj->log("Load tskv_gen file: $file_tskv_gen beg");
                $proj->log("Filters: ".Dumper($filters));
                $offers_feed = $proj->feed({
                    extfile => $file_tskv_gen,
                    datatype => 'offers_tskv',
                    filters => $filters,
                    offer_source => 'tskv_gen',  # fallback
                } );
                $proj->log("Load tskv_gen file: $file_tskv_gen end");
            }

            if ($offers_feed->is_empty) {
                $proj->log("ERROR: tskv_gen (crawler + dse) is empty");
                $self->set_params({ FunnelInfo => "ERROR: tskv_gen (crawler + dse) is empty", });
                return;
            }

        }

        my $feed_data_type = $offers_feed->fds->{feed_data_type};
        if ($feed_data_type){
            $self->stagelog("=== end offers $dmn === offers=".$offers_feed->get_total_offers_count);
            push @main_feeds, $offers_feed;
        } else {
            $proj->log("ERROR: can't define feed_data_type for offers feed");
            $self->set_params({ FunnelInfo => "ERROR: can't define feed_data_type", });
            return;
        }
    }else{
        $self->stagelog("=== no site filters $dmn ===");
    }

    #Добавление данных от специальных урлов
    my @specurls_fds = $self->specurls_offers_fd;
    $self->stagelog("=== end specurls offers $dmn ===");
    push @main_feeds, @specurls_fds;

    return \@main_feeds;
}

sub get_feedparams {
    my ($self) = @_;
    my $feedparams = $self->SUPER::get_feedparams;
    $feedparams->{source_letter} = 'o';
    return $feedparams;
}

sub worker {
    my ($self) = @_;

    $self->check_deprecated_options;

    $self->worker_prepare  or do {
        $self->log("nothing to do!");
        return;
    };

    # если у таски совпадают все поля, кроме body, копируем export_offers
    if ($self->{copy_from}) {
        return if $self->worker_copy;
    }

    my $result = $self->worker_generate;
    if (!$result) {
        $self->log("can't generate result");
        return;
    }

    $self->worker_finalize($result);
}

sub worker_prepare {
    my $self = shift;
    my $proj = $self->proj;

    $self->{_begin_time} = time;
    my $begin_time = $self->{_begin_time};

    # cache some data for usage in YT
    $self->redirect_domain;
    $self->specfilters;

    $self->clear_stagelog;
    $self->{FalseStart} = 0;
    $self->{result_stat} = '';

    $proj->log(Utils::CompileTime::status_str());

    # обрабатываем таск не чаще, чем раз в 24 часа
    my $cycle_time = 24 * 3600;
    if (defined $proj->options->{dyn_custom_cycle_time_domains}->{$self->domain}) {
        $cycle_time = $proj->options->{dyn_custom_cycle_time_domains}->{$self->domain};
    }
    $proj->log("cycle_time = $cycle_time");
    my $last_begin = $self->get_param("begin");
    if ($last_begin and !$ENV{BANNERLAND_FORCE_RUN_GENERATION}) {
        my $last_begin_time = $proj->dates->trdate('db_time', 'sec', $last_begin);
        if (time() - $last_begin_time < $cycle_time) {
            $self->{FalseStart} = 1;
            $self->stagelog("=== last_begin='$last_begin', do nothing ===");
            return;
        }
    }

    my $prev_end = $self->get_param('end');
    if ($prev_end && $prev_end ne '0000-00-00 00:00:00') {
        my $wait_time = $begin_time - $proj->dates->trdate('db_time', 'sec', $prev_end);
        $self->set_debug_info("wait_time", $wait_time);
    }

    $self->set_params({
        cronlight_id  => $proj->{cronlight_id} || '',
        begin         => $self->curtime,
        end           => '',
        crontime      => $proj->{cronlight_crontime} || '',
        prev_begin    => $self->get_param('begin'),
        prev_end      => $prev_end,
        prev_crontime => $self->get_param('crontime'),
        Domain        => $self->domain, #$self->taskinf->{Resource}{Domain},
        Geo           => $self->taskinf->{Resource}{Geo},
        Body          => $self->taskinf->{Resource}{Body},
        FeedUrl       => $self->feedurl,

        GroupExportIDs  => join(' ', sort @{ $self->taskinf->{GroupExportIDs}  || [] }), # пока что вкостылим. чтобы у существующих тасков в таблице это поле обновилось

        TargetsJson => $proj->json_obj->encode($self->taskinf->{Resource}{Targets}),
        DirectTargetsJson => $proj->json_obj->encode($self->taskinf->{Resource}{DirectTargets}),

        host            => $proj->host_info->{host},
        domain_dir_name => $self->filedmn,
    });

    my $body_check_error;

    if ( !$self->proj->is_bs_compatible($self->taskinf->{Resource}{Body}) ) {
        $body_check_error = 'charset';
    }
    if ($body_check_error) {
        my $log_msg;
        if ($body_check_error eq 'charset') {
            $log_msg = 'body has incompatible characters, Exit!';
        }
        else {
            $log_msg = 'unknown body check error, Exit!';
        }
        $self->log($log_msg);
        $self->stagelog($log_msg);
        $self->set_params({
            end           => $self->curtime,
            offerscount   => 0,
            resultcount   => 0,
            stagelog      => $self->get_stagelog,
            FunnelInfo    => $log_msg,
        });
        return;
    }

    if ($self->get_param('work') ne 'work') {
        my $funnel = "task_skipped_stopped_in_the_interface";
        $self->set_params({ FunnelInfo => $funnel });
        $self->export_offers_from_empty_feed(funnel => {$funnel => 1});
        $self->stagelog("=== the task (".$self->task_id.") was stopped in the interface ('".$self->get_param('work')."') ===");
        return;
    }

    # не обрабатываем таск если он в списке заблокированных
    if ($self->is_blocked_task) {
         $self->stagelog("=== task_id was blocked, do nothing ===");
         $self->set_params({ FunnelInfo => "task_blocked", });
         return;
    }

    return 1;
}

sub worker_generate {
    my $self = shift;
    my %opts = @_;

    my $proj = $self->proj;

    $self->check_deprecated_options;

    my $begin_time = $self->{_begin_time} // time;

    my $main_mirror = $self->main_mirror;

    $self->extlog(Dumper($self->taskinf));

    my $dmn = $self->domain;

    my $filefltinf = $self->filenames->{"filterinf"};

    my $filetaskjson  = $self->filenames->{"taskjson"};

    my $s = $self->site;

    eval { # ignore crush 'http://direct-modback.yandex.ru:3200/performanceModerate': 500
        unless ($s->moderate_domain) {
            #Модерация доменов
            my $mail_data = {
                from      => 'skreling@yandex-team.ru',
                mail_list => "bannerland_moderation",
                subject   => 'Bad domain (moderation) ' . $s->domain,
                body      => "RES: " . $s->{moderate_log},
            };
            $proj->SendMail($mail_data);
            $proj->log("Bad domain (moderation) => " . $s->{moderate_log});
        } else {
            $proj->log(" moderate_domain - OK ");
        }
    };

    #Пишем в лог данные фильтров
    open(my $fhfi, "> $filefltinf"."_tmp");
    print $fhfi Dumper(['taskinf', $self->taskinf]);
    print $fhfi Dumper(['filters', $self->filters]);
    print $fhfi Dumper(['main_mirror', $main_mirror]);
    close($fhfi);
    $self->_tmp2file($filefltinf);
    $self->stagelog("=== end filefltinf $dmn ===");
    #/Пишем в лог данные фильтров

    #Сохряняем json таски
    open(my $fhtj, "> $filetaskjson"."_tmp");
    print $fhtj $task_json."\n";
    close($fhtj);
    $self->_tmp2file($filetaskjson);
    $self->stagelog("=== end filetaskjson $dmn ===");
    #/Сохраняем json таски

    #Генерация фраз по фиду

    #Список всех фидов, которые нужно обработать
    my $feeds = $self->get_feeddata
        or return;  # some error

    my $task = $self->taskinf;
    $task->{Resource}{UseAsName} =~ s/(^\s+|\s+$)//g if ($task->{Resource}{UseAsName});
    $task->{Resource}{UseAsBody} =~ s/(^\s+|\s+$)//g if ($task->{Resource}{UseAsBody});
    for my $fd (@$feeds) {
        $fd->{use_as_name} = $self->fill_use_as_field($fd, $task->{Resource}{UseAsName}) if ($task->{Resource}{UseAsName});
        $fd->{use_as_body} = $self->fill_use_as_field($fd, $task->{Resource}{UseAsBody}) if ($task->{Resource}{UseAsBody});
    }

    $self->stagelog("=== end feed pkd $dmn === ".$self->curtime." ===");

    $proj->bannerland_dbh->reconnect; #Соединение могло протухнуть

    my %eo_par = (
        check_required_fields => 0,
        %{$opts{export_offers_par} // {}},
    );
    if (defined $self->{timeout_hours}) {
        $eo_par{timeout} = $self->{timeout_hours} * 3600 - (time - $begin_time);
    }

    return $self->do_export_offers(feeds => $feeds, %eo_par) // {};
}

# по offer-у получить список фраз
# на входе:
#   $pt  -  offer
#   ctx =>  контекст генерации, изменяемый in-place хэш с полями
#       funnel => subj, ... на YT: контекст вызова dyn_banners
#   ext_data => информация о внешних источниках
#
# возвращает undef (если что-то не так) или хэш с результатами: {
#   phrases => список хэшей фраз
#   filtered  => данные для фильтрлога
# }
#
# Оставляем функцию максимально простой, т.к. на YT её не будет, собирать кубики process_offer* будем по-другому!
#
sub process_offer {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    my $proj = $self->proj;

    $self->process_offer_init($pt, %ppar)
        or return;

    my ($nat_arr, undef) = $self->process_offer_generate_native($pt, %ppar);

    # ускорение, доступное только на железе: dse нужны, только если не сгенерили нативку
    # на yt эта логика отработает в combine
    if (@$nat_arr) {
        $ppar{ext_data} = [ grep { $_->{source}{add_type} eq 'add_phrases' } @{$ppar{ext_data}} ];
    }

    my ($ext_arr, undef) = $self->process_offer_generate_external($pt, %ppar);

    my $arr = $self->process_offer_combine($nat_arr, $ext_arr);
    return unless @$arr;

    $ppar{phrases} = $arr;
    $ppar{minuswords} = $self->process_offer_generate_minuswords($pt, %ppar);

    return $self->process_offer_finalize($pt, %ppar);
}

sub is_payment_page {
    my $self = shift;
    my $pt = shift;

    if ($pt->offer_source ne 'feed') {
        my $pt_fixed_url = $self->get_pt_fixed_url($pt);
        return 1 if $pt_fixed_url =~ /\/success(ful[-_]payment|[-_]payment|[-_]page|[-_]reg)?(.php)?(\?|\/|$)/i;
        return 1 if $pt_fixed_url =~ /\/onlinePayment\//i;
    }
    return 0;

}

# Простая пред-обработка оффера, получение параметров и т.п.
sub process_offer_init {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    my $proj = $self->proj;
    my $ctx = $ppar{ctx};
    my $fcount = $self->get_funnel_for_pt($pt, $ctx);

    $pt->nullify_fields_with_cdata();

    if (!$self->feedurl) {
        return if !$self->check_offer_domain($pt);
    }
    # Не обрабатываем офферы, у которых проблемы с доменами
    unless ($pt->{orig_domain} && $pt->{orig_domain_id}) {
        return;
    }
    unless ($pt->{main_mirror} && $pt->{main_mirror_id}) {
        return;
    }
    $fcount->{offers_after_domain_filter}++;

    return if !$self->get_pt_fixed_url($pt);
    $fcount->{offers_with_url}++;

    my $specflts = $self->specfilters; #Специальные фильтры по подразделам

    #Проверяем, что урла нет в списке запрещённых для текущих фильтров
    do {
        my @fltids = split(',', $pt->{offerfilters});  #Список фильтров, под которые попадает товар
        my @pt_specfilters = grep {defined($specflts->{$_})} @fltids;
        my @pt_not_specfilters = grep {!defined($specflts->{$_})} @fltids;
        my @pt_appropriate_specfilters = grep {!$specflts->{$_}{$pt->norm_url}} @pt_specfilters;
        $pt->{offerfilters} = join ',', @pt_not_specfilters, @pt_appropriate_specfilters;
    };

    return if !defined($pt->{offerfilters});
    $fcount->{offers_after_check_specfilters}++;

    # проверяем что это не страница завершения заказа
    return if $self->is_payment_page($pt);
    $fcount->{offers_with_not_banned_url}++;

    #для геофильтра
    if ($ENV{MR_BROADMATCH}) {
        $pt->{task_geo_list} = $self->geo_list;
    } else {
        $pt->{task_geo_subtree} = $self->geo_subtree;
    }
    $pt->{minus_words} =  $self->get_all_minuswords;

    return {gen_params => {}};  # dummy hash, for compatibility with lib/make_banners.py
}

sub get_banners_method_par {
    my $self = shift;
    my $pt = shift;
    my %par;
    $par{add_last_methods_arr} = ['store_search_count_in_minf'];
    return \%par;
}

sub process_offer_generate_native {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    my $ctx = $ppar{ctx};
    my @banners_result_arr;

    $self->load_custom_phrases_settings;
    if ($self->{custom_phrases_allowed} and $pt->{custom_phrases}) {
        # DYNSMART-491: дописываем кастомные фразы из фида и не генерим наши
        my @custom_phrases = split ",", $pt->{custom_phrases};

        if (scalar @custom_phrases > $self->{custom_phrases_max_count_per_offer}) {
            @custom_phrases = @custom_phrases[0 .. $self->{custom_phrases_max_count_per_offer} - 1];
        }
        for my $ph (@custom_phrases) {
            push @banners_result_arr, { phrase => $ph, title => '', title_source => '', title_template => '', title_template_type => '', template => 'custom', letter => 'q' }; # https://st.yandex-team.ru/DYNSMART-543
        }
    }
    my $native_banners = [];
    my @rpc;
    if (!$self->{only_custom_phrases} or (!$self->{custom_phrases_allowed} or !$pt->{custom_phrases})) {
        if ($ENV{MR_BROADMATCH}) {
            my $gen_ctx = ($ctx->{generate_phrases} //= {});
            my ($arr, $rpc) = $pt->dyn_banners(ctx => $gen_ctx, %{$self->get_banners_method_par($pt)});
            push @rpc, @$rpc if $rpc;
            push @$native_banners, @$arr if $arr;
        }
        else {
            $native_banners = $pt->dyn_banners_cached;
        }
        push @banners_result_arr, @$native_banners;
    }
    return (\@banners_result_arr, \@rpc);
}

sub process_offer_generate_external {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    my $specurls = $self->taskinf->{Resource}{SpecUrls} // [];
    if (!$self->feedurl and !@$specurls) {
        # we do need dse as in get_feeddata, it is in tskv_gen already
        $ppar{ext_data} = [ grep { $_->{source}{product_type} ne 'dse' } @{$ppar{ext_data}} ];
    }
    return $self->SUPER::process_offer_generate_external($pt, %ppar);
}

sub postprocess_external {
    my $self = shift;
    my $pt = shift;
    my $arr = shift;
    my $source_info = shift;
    my %ppar = @_;

    $arr = $self->SUPER::postprocess_external($pt, $arr, $source_info, %ppar);
    for my $h (@$arr) {
        $h->{add_type} = $source_info->{add_type};
    }
    return $arr;
}

# оставляем заглушку -- нужно решить, какие минус-слова уносим в YT
sub process_offer_generate_minuswords {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    return;
}

# объединение нативных и external баннерофраз
sub process_offer_combine {
    my $self = shift;
    my ($native_arr, $external_arr) = @_;

    my %seen_native = map {$_->{phrase} => 1} grep {$_->{title_template_type} ne 'fallback'} @$native_arr;

    my @ext_arr = grep { !$seen_native{$_->{phrase}} } @$external_arr;
    my @add_banners_arr = grep { $_->{add_type} eq 'add_banners' } @ext_arr;
    my @add_phrases_arr = grep { $_->{add_type} eq 'add_phrases' } @ext_arr;

    # Сохраним резервную генерацию, если нет @add_banners_arr
    my $native_without_fallback = [grep { $_->{title_template_type} ne 'fallback' } @$native_arr];
    my $fallback_generation = [grep { $_->{title_template_type} eq 'fallback' } @$native_arr];
    my @arr = @$native_without_fallback;

    my $ext_banners_used = 0;

    # добавляем экстернал-баннеры, если не было нативной генерации
    if (!@$native_without_fallback) {
        if (@add_banners_arr) {
            push @arr, @add_banners_arr;
            $ext_banners_used = 1;
        } else {
            push @arr, @$fallback_generation;
        }
    }

    if (@arr) {
        push @arr, @add_phrases_arr;

        if (!$ext_banners_used and @add_banners_arr) {
            # остались неиспользованными экстернал-баннеры
            # берем оттуда фразы, но без заголовков
            for my $el ( @add_banners_arr ) {
                my $new_el = { %$el };
                delete $new_el->{title};
                delete $new_el->{title_source};
                delete $new_el->{title_template};
                delete $new_el->{title_template_type};
                push @arr, $new_el;
            }
        }
    }
    return uniq_array_ref_deep(\@arr);
}

sub process_offer_finalize {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    my $proj = $self->proj;
    my $ctx = $ppar{ctx};
    my $fcount = $self->get_funnel_for_pt($pt, $ctx);

    $self->load_custom_phrases_settings;

    my $price_inf;
    my $valid_price = $pt->valid_price($pt->{price});
    if ($valid_price && $valid_price > 0) {
        if (!$pt->{currencyId}) {
            my $currency_id = $self->get_region_currency;
            $pt->{currencyId} = $currency_id if $currency_id;
        }
        my $currency_re = $self->proj->options->{currency_re};
        if ($pt->{currencyId} && ($pt->{currencyId} =~ /^($currency_re)$/i)) {
            my $p = $valid_price;
            my $currency = uc $pt->{currencyId};
            $currency = "RUR" if $currency eq "RUB";
            $price_inf = [ $currency, $p ];
        }
    }
    my @phids = grep { defined($_) } split /,/, $pt->{offerfilters};
    s/\:\d+// for @phids;

    my $pttext = $pt->name || join(' ', grep {$_} ( $pt->typePrefix, $pt->model, $pt->vendor, $pt->{categpath} ) );

    my $body;
    if ($pt->{use_as_body}) {
        $body = $self->proj->body_builder->get_body_from_use_as($pt);
    }
    $body ||= $self->taskinf->{Resource}{Body};

    # данные баннерофразы, общие на оффер

    my $fixed_url = $self->get_pt_fixed_url($pt);
    my $checksum = get_checksum($fixed_url);

    my $Direct2BSData = $self->get_direct2bsdata;
    my $pttext_phr = $proj->phrase($pttext);

    #дополняем логику категоризации баннера:
    my $ctgsids = join(',', $pttext_phr->get_minicategs_directids);
    #если категоризация не удалась - просто берём категорию оффера и подставляем её в категорию баннера.
    if ($pt->{minicategs}) {
        # категорийные флаги берем всегда
        my @categs = split(m/\//, $pt->{minicategs});
        if ( !$ctgsids ) {
            $ctgsids = join (',',  map { $proj->categs_tree()->get_minicateg_directid($_) } @categs);
        }
    }

    my $main_mirror = $pt->{main_mirror};
    my $offer_id = $pt->{'id'} // $pt->{'OfferID'} // $pt->{'OfferId'};
    my $banned_simdistances = $self->get_banned_simdistances();

    # поля, общие для всех баннеров данного оффера
    my %common_banner = (
        product_md5     => $pt->clean_md5,
        OrderID         => $self->taskinf->{OrderID},
        ClientID        => $self->taskinf->{ClientID},
        ParentExportID  => $self->ParentExportID,
        GroupExportID   => $self->GroupExportID,
        TemplateID      => $self->TemplateID,
        OfferID         => $offer_id || undef,
        Url             => $fixed_url,
        CanonizedUrl    => $pt->{canonized_url} // $fixed_url,
        Body            => $body,
        Site            => safe_punycode_decode($pt->{orig_domain}),
        TargetDomain    => $main_mirror,
        TargetDomainID  => $pt->{main_mirror_id},
        SiteFilter      => $pt->{orig_domain},
        SiteFilterID    => $pt->{orig_domain_id},
        DynamicBannerID => $checksum,
        Categories      => $ctgsids,
        Direct2BSData   => $Direct2BSData,
        Lang            => 'ru',
        Currency        => ($price_inf ? $price_inf->[0] : ''),
        Price           => ($price_inf ? $price_inf->[1] : ''),
        BannerlandBeginTime => $self->taskinf->{BannerlandBeginTime},
        OrderTags           => $self->taskinf->{OrderTags} // '[]',
        OfferFilters        => $pt->{offerfilters},
    );

    $common_banner{BusinessId} = $self->datacamp_business_id;
    $common_banner{ShopId} = $self->datacamp_shop_id;
    $common_banner{OfferYabsId} = $pt->{offerYabsId};

    my $jsondata = {
        adv_type => $pt->ad_type,
    };

    my $picture = $pt->get_fixed_picture;
    $jsondata->{images} = [$picture] if $picture;
    if ($price_inf){
        $jsondata->{currency} = $price_inf->[0];
        $jsondata->{price}    = $price_inf->[1];
        my $oldprice = $pt->get_valid_oldprice;
        $jsondata->{oldprice} = $oldprice if $oldprice;
    }

    my (@seen_titles, %title_to_phrases, %title_to_sources); # сохраняем порядок title-ов, группируем по title
    for my $el (@{$ppar{phrases}}) {
        my $title_source = $el->{title_source} || '';
        my $title = $el->{title} || '';
        if (!$self->check_banned_title_source($title_source, $main_mirror)) {
            $title = '';
            $title_source = '';
        }
        push @seen_titles, $title if (!$title_to_phrases{$title});
        push @{$title_to_phrases{$title}}, $el;
        if ($title_source && (! $title_to_sources{$title})) {
            $title_to_sources{$title} = $title_source;
        }
    }

    my %rand_title_info;  # У ext-источников нет тайтлов, возьмём псевдослучайный среди хороших (пользуемся тем, что ext идут в конце!)
    my (@phrases, @filtered);
    for my $title (@seen_titles) {

        my $title_source = $title_to_sources{$title} || '';
        my $els = $title_to_phrases{$title};
        next if !$els || !@$els;  # может не быть фраз с пустым title

        my $title_template = $els->[0]->{title_template};
        my $title_template_type =  $els->[0]->{title_template_type};
        if (!$title) {
            # тайтла нет (external?) - берём из первого нативного
            next if !%rand_title_info;
            $title = $rand_title_info{text};
            $title_source = $rand_title_info{source};
            $title_template = $rand_title_info{template};
            $title_template_type = $rand_title_info{template_type};
        }
        $fcount->{banners_with_title}++;

        # проставляем правильный letter
        $_->{letter} = $_->{letter} || $pt->source_letter || default_source_letter() for @$els;

        next unless $proj->is_bs_compatible($title);
        $fcount->{banners_with_bs_compatible_title}++;

        my $green_url = $self->get_green_url($pt, $els->[0]);

        my $bl_banner_details = {
            title_template => $title_template,
            title_template_type => $title_template_type,
            title_source   => $title_source,
            offer_source => $pt->offer_source,
        };

        # fields related to banner
        my %banner = (
            %common_banner,
            Title           => $title,
            UrlText         => $green_url,

            # BL internal YSON field (json to yson in convert_yt_to_py)
            BLBannerDetails => $proj->json_obj->encode($bl_banner_details),
        );

        my @banner_phrases;
        my @fallback_banner_phrases;

        $fcount->{banners_after_throttle}++;

        my $phr_count = {};
        my $sorted_els = $self->sort_phrases_by_priority_order($els);
        for my $el (@$sorted_els) {
            my $phtext = $el->{phrase};
            my $letter = $el->{letter};
            my $feed_source = $self->source_letter_2_name($letter);
            my $is_custom_phrase = ($el->{template} eq 'custom');

            my $bl_phrase_details = {
                bl_phrase_template      => ($el->{template} // ''),
                bl_phrase_template_type => $letter,
            };

            next if !$is_custom_phrase && $self->{OnlyClientPhrases};

            $phr_count->{phrases_from_source}++;

            next if $phtext =~ /(?<!\s)-/;  #Как заглушка, убираем фразы с дефисами, будет исправлено в парсере
            $phr_count->{phrases_without_hyphen}++;

            if ( $proj->phrase($phtext)->is_porno_phrase ) {
                $proj->log("'$phtext' ($feed_source) filtered by porno filter");
                next;
            }
            $phr_count->{phrases_after_porno_filter}++;

            if (!$self->is_good_phrase_text($phtext, $feed_source)) {
                next;
            }
            $phr_count->{phrases_good_text}++;

            my $match_type = $el->{match_type} // $pt->match_type;
            $match_type = ($match_type eq 'norm') ? 'norm' : 'snorm';  # на всякий случай

            if ( !%rand_title_info || (md5int($rand_title_info{text}) < md5int($title)) ) {
                $rand_title_info{text} = $title;
                $rand_title_info{source} = $title_source;
                $rand_title_info{template} = $el->{title_template};
                $rand_title_info{template_type} = $el->{title_template_type};
            }

            my $SimDistance = $self->get_sim_distance_by_letter($letter);
            next if $banned_simdistances->{$SimDistance}; # бан по симдистансу для отдельных клиентов
            $phr_count->{phrases_sd_not_banned}++;

            next unless scalar(@phids);
            my ($phid) = sort @phids;

            $phr_count->{phrases_after_all_filters}++;
            my $h = {
                %banner,
                Info               => $proj->json_obj->encode($jsondata),
                BLPhraseDetails    => $proj->json_obj->encode($bl_phrase_details),
                Type               => $match_type,
                Text               => $phtext,
                PhraseID           => $phid,
                CTR                => 0,
                PCTR               => 0,
                SimDistance        => $SimDistance,
                SpecPlaceFlag      => 1,
                OnlyYandexFlag     => 0,
                APCRatio           => 1000000,  # см. PerfTask

                # for dups filter on YT:
                bl_phrase_template_type => $letter,
                Score              => 100,
            };

            $h->{search_count} = $el->{minf}{search_count} || 0;
            $self->check_banner_phrase_fields($h);
            $self->cast_result_values($h);
            if ( $el->{template} ne 'phrase_from_title' ) {
                push @banner_phrases, $h;
            } else {
                push @fallback_banner_phrases, $h;
            }
        }
        if (scalar(@banner_phrases)) {
            push @phrases, @banner_phrases;
        } elsif (scalar(@fallback_banner_phrases)) {
            my ($top_fallback_phrase) = sort {$b->{search_count} <=> $a->{search_count}} @fallback_banner_phrases;
            push @phrases, $top_fallback_phrase;
        }
        $fcount->{'banners_with_'. $_}++ for keys %$phr_count;
    }

    my $deduplicated_phrases = $self->filter_phrase_duplicates(\@phrases);

    my %result = (phrases => $deduplicated_phrases, filtered => \@filtered);

    my $max_phrases_per_banner = $self->get_max_phrases_per_banner;
    if (defined $max_phrases_per_banner) {
        $result{phrases} = $self->get_top_phrases(
            $result{phrases},
            limit => $max_phrases_per_banner,
            sort_by => 'search_count',
            group_by => 'BannerID',
        );
    }

    return \%result;
}

sub worker_finalize {
    my $self = shift;
    my $result = shift;

    if ($result->{export_info}) {
        $self->set_params($self->get_export_offers_resparams($result->{export_info}));
    }
    return;
}

sub get_ad_group_minuswords :CACHE {
    my ($self) = @_;

    my $ad_group_minuswords = $self->taskinf->{Resource}{AdGroupMinusWords} || "";
    $ad_group_minuswords =~ s/\(.*?\)//g; # удаляем весь текст внутри круглых скобок, чтобы не учитывать минус-фразы DYNSMART-465
    return $self->proj->phrase($ad_group_minuswords)->normwordshash;
}

sub get_campaign_minuswords :CACHE {
    my ($self) = @_;

    my $campaign_minuswords = $self->taskinf->{Resource}{CampaignMinusWords} || ""; # хотим учитывать и минус-слова компаний DYNSMART-465
    $campaign_minuswords =~ s/\(.*?\)//g; # удаляем весь текст внутри круглых скобок, чтобы не учитывать минус-фразы DYNSMART-465
    return $self->proj->phrase($campaign_minuswords)->normwordshash;
}

sub get_all_minuswords :CACHE {
    my ($self) = @_;

    my $ad_group_minuswords = $self->get_ad_group_minuswords;
    my $campaign_minuswords = $self->get_campaign_minuswords;
    my %all_minuswords = (%$ad_group_minuswords, %$campaign_minuswords);
    return \%all_minuswords;
}

sub is_good_phrase_text {
    my ($self, $text, $source_name, $stat) = @_;

    my $proj = $self->proj;
    my $phr = $proj->phrase($text);
    $text =~ s/~0$//;
    my $fixed_phr = $proj->phrase($text);

    my %all_minuswords = %{$self->get_all_minuswords};
    if(%all_minuswords) {
        my @bad_words = grep{$all_minuswords{$_}} $fixed_phr->normwords;
        if(@bad_words) {
            $stat->{filtered_by_all_minuswords}++ if $stat;
            $proj->log("'$phr' ($source_name) filtered by minus words");
            return 0;
        }
    }

    # DYNAMICADS-209
    # пропускаем фразы с плохой статистикой
    my $bad_dyn_phrases = $proj->bad_dyn_phrases;
    if($bad_dyn_phrases->check_order_phrase($self->taskinf->{OrderID}, $phr)) {
        $stat->{filtered_by_ctr}++ if $stat;
        $proj->log("'$phr' ($source_name) filtered by ctr");
        return 0;
    }

    return 1;
}

# Возвращает ссылку на массив источников, у каждого из которых есть поля percent, filename, letter.
sub get_external_sources {
    my ($self) = @_;
    my @sources = @{$self->proj->options->{DynSources}{inclusion_params}};
    return \@sources;
}

sub get_sim_distance_by_letter {
    my ($self, $letter) = @_;
    return $self->proj->options->{DynSources}->{sim_distance}->{$letter};
}

sub _save_feedtskv_files {
    my ($self, $feed) = @_;
    my $filefeedtskv = $self->filenames->{"feedtskv"};
    $feed->offers_tskv_mpd_file(filename => "${filefeedtskv}_tmp");
    $self->_tmp2file($filefeedtskv);
    $self->stagelog("=== end filefeedtskv ===");
}

sub task_feedurl_lock {
    my $self = shift;
    return "dyn_" . $self->feedurl_md5;
}

sub get_default_input_tasks_yt_table: CACHE {
    my $self = shift;
    my $source_table;
    if (get_host_role() eq 'bannerland-preprod') {
        $source_table = $self->proj->options->{"dyn_tasks_source_table_preprod"};
    } else {
        $source_table = $self->proj->options->{"dyn_tasks_source_table"};
    }
    return $source_table;
}

sub get_task_type {
    return 'dyn';
}

sub get_priority_order {
    my $self = shift;
    return $self->proj->options->{DynSources}{priority_order};
}

sub get_optional_fields {
    my $self = shift;
    my $result_fields = $self->proj->options->{'dyn_result_columns'};
    my $optional_fields = { map {$_->{'name'} => 1} grep {$_->{'optional'}} @$result_fields };
    return $optional_fields;
}

sub get_result_fields {
    my $self = shift;
    return $self->proj->options->{'dyn_result_columns'};
}

1;
