package BM::BannersMaker::Tasks::Task;

use utf8;
use open ':utf8';

use std;
use base qw(ObjLib::ProjPart);

use Data::Dumper;
use Storable qw(dclone);
use List::Util qw( max sum);
use Utils::Urls qw (url_sort_get_params fix_url_scheme);

use Encode;
use Encode qw{_utf8_on _utf8_off};

use URI::Escape;
use LWP::UserAgent;
use IPC::Open2;
use JSON qw(from_json to_json);
use File::Copy;

use Scalar::Util qw(weaken openhandle);

use Digest::MD5 qw(md5_hex);
use XML::Parser;

use XMLParser;
use Utils::Sys qw(
    md5int get_files_md5
    wait_for_file_lock release_file_lock get_file_lock
    mtime
    load_json save_json
    url_decode_safe
);
use Utils::Common;
use Utils::Hosts qw(get_curr_host);
use Utils::Urls;
use Utils::CompileTime qw();
use Utils::Funcs qw(encode_tsv parse_tskv_line get_data_md5 patch_hash);
use Utils::Regions;
use Utils::Array qw(in_array);

use BM::BannersMaker::Tasks::TaskUtils;

########################################################
#Доступ к полям
########################################################

__PACKAGE__->mk_accessors(qw(
    taskinf
));
#    site_proxy_ref


########################################################
# Интерфейс
########################################################

########################################################
# Инициализация
########################################################

our $badre = '';
my $svn_revision = Utils::CompileTime::get_revision();

sub init {
    my ($self) = @_;
#    $self->SUPER::init();
}

########################################################
# Методы
########################################################

sub curtime { $_[0]->proj->curtime; }

#Время начала для подстановки в файл
sub filebegtime :CACHE {
    my ($self) = @_;
    return BM::BannersMaker::Tasks::TaskUtils::filebegtime($self->curtime);
}

sub get_ptl_pack_size {
    # большой размер пачек полезен для выравнивания пачек хождени в зору
    return 3000;
}

# очищаем taskinf от незначащих параметров (общих для dyn и perf)
sub clean_taskinf {
    my $self = shift;
    my $inf = dclone($self->taskinf);

    # TODO: удалить bannerland_queue_id, после того, как все очереди сформируются без него!

    # незначащие параметры таски, https://st.yandex-team.ru/DYNSMART-704
    # TODO: удалить ManagerEmail и HasSmartTGOHostOption, когда перейдём на новый экспорт
    my @unsignif = qw(
        EngineID ManagerEmail Login Password CounterID GenerationID HasSmartTGOHostOption BSTaskID
        bannerland_queue_id BannerlandBeginTime
        GroupExportIDs
    );

    delete $inf->{$_} for @unsignif;
    return $inf;
}

# некоторые значения лежат в виде списка из одного элемента
sub get_value_from_list_field {
    my $self = shift;
    my $key = shift;
    my @ids = @{$self->taskinf->{$key}};
    if (@ids != 1) {
        my $task_id = $self->task_id;
        die "task $task_id has not not one id in field $key: @ids";
    }
    return $ids[0];
}

sub clean_md5 :CACHE {
    my $self = shift;
    my $clean_taskinf = $self->clean_taskinf;
    $clean_taskinf->{salt} = 1 if $self->datacamp_site;
    return get_data_md5($clean_taskinf);
}

sub get_bannerphrase_md5 {
    my $self = shift;
    my $bp = shift;

    my $copy = dclone($bp);
    $copy->{Info} = $self->proj->json_obj->decode($copy->{Info});

    return get_data_md5($copy);
}

sub get_pt_fixed_url {
    my $self = shift;
    my $pt = shift;

    my $url = $pt->url;
    return if !$url;

    $url = $self->fix_url_by_task(fix_url_scheme($url));
    return if !$self->proj->validate_url($url);

    $url = Utils::Urls::url_to_punycode($url);
    $url = url_sort_get_params($url) if ($self->proj->page($url)->domain eq 'my-shop.ru');

    return $url;
}

#Раскрытие макросов от Директа
sub transf_url_macros {
    my ($self, $url) = @_;
    my $task = $self->taskinf;
    my $udct = $task->{Resource}{SubstituteURLParams};
    return $url unless $udct;
    $url =~ s/(\{[-_A-Za-z0-9]+\})/ $udct->{$1} || $1 /eg;
    
    my $escaped_udct = {};
    for my $k (keys %$udct) {
        my $v = $udct->{$k};
        $escaped_udct->{URI::Escape::uri_escape($k)} = $v;
    }
    $url =~ s/(%7B[-_A-Za-z0-9]+%7D)/ $escaped_udct->{$1} || $1 /eg;
    return $url;
}

#Формат файлов для сохранения промежуточных данных
sub fileformat {
    my ($self, $type) = @_;
    return $self->dir."/".$self->filebegtime."_".$type;
}

sub use_task_logs {
    my ($self) = @_;
    return $self->{use_task_logs} // 1;
}

sub action_before_task {
    my ($self) = @_;

    $self->proj->log("begin task:");
    $self->proj->log_dump($self->taskinf);

    if($self->use_task_logs){
        my $fileerr = $self->dir."/".$self->filebegtime."_task.err";
        my $filelog = $self->dir."/".$self->filebegtime."_task.log";

        $self->do_sys_cmd("mkdir -p " . $self->dir) unless -d $self->dir;

        print STDOUT "New STDOUT $filelog\n";
        print STDERR "New STDERR $fileerr\n";

        {
            open(OLDOUT, ">&STDOUT");
            open(OLDERR, ">&STDERR");
            *OLDOUT = *STDOUT{IO};
            *OLDERR = *STDERR{IO};
            $self->{_prev_stdout} = *OLDOUT;
            $self->{_prev_stderr} = *OLDERR;
            #$self->{_prev_stdout} = *STDOUT;
            #$self->{_prev_stdout} = *STDOUT;
        }


        # перенаправление логов
        my $log_dir = $Utils::Common::options->{dirs}{'log'};
        open NSTDOUT, ">> $filelog" or die "Can't redirect STDOUT: $!";
        open NSTDERR, ">> $fileerr" or die "Can't redirect STDERR: $!";
        #*STDOUT = *NSTDOUT{IO};
        *STDERR = *NSTDERR{IO};
        #Если использовать не select для STDOUT, то почему-то ломается чтение в IPC::Open2, что критично для BM::LemmerTest
        select(NSTDOUT);
        STDOUT->autoflush(1);
        STDERR->autoflush(1);
    }
}

sub action_after_task {
    my ($self) = @_;

    if($self->use_task_logs){
        close(STDOUT);
        close(STDERR);

        #*STDOUT = *{$self->{_prev_stdout}}{IO};
        select(*{$self->{_prev_stdout}}{IO});
        *STDERR = *{$self->{_prev_stderr}}{IO};
    }

    $self->proj->log("end task");
}

sub make_task {
    my ($self) = @_;
    $self->check_deprecated_options;
    $self->action_before_task;
    $self->worker;
    $self->action_after_task;
}

sub extlog {
    my ($self, $text) = @_;
    my $fh = openhandle($self->{_prev_stdout});
    $fh = openhandle($self->proj->{global_stderr}) unless defined($fh);
    my $msg = $self->curtime." [".$self->task_log_id."]"."  $text\n";
    unless( defined($fh) ){
        print STDERR $msg;
        return;
    }
    print $fh $msg;
}

sub globallog {
    my ($self, $text) = @_;
    #Общий лог ошибок
    my $fh = openhandle($self->proj->{global_stderr});
    $fh = openhandle($self->{_prev_stdout}) unless defined($fh);
    return unless defined($fh);
    print $fh $self->curtime." [".$self->task_log_id."]"."  $text\n";
}

sub clear_stagelog {
    my ($self) = @_;
    $self->{_stagelog} = '';
}

sub get_stagelog {
    my ($self) = @_;
    return $self->{_stagelog};
}

sub stagelog {
    my ($self, $text) = @_;
    $self->proj->log($text); #Пишем в текущий лог
    $self->extlog($text);    #Пишем во внешний лог
    $self->{_stagelog} .= "$text\n";
    return if $ENV{BM_NO_STAGELOG};  # spike technology
    eval {$self->set_params({
        stagelog      => $self->get_stagelog,
    }) };
}

#Домен таска
sub domain :CACHE {
    my ($self) = @_;
    return 'nodomain';
}

#Домен для подстановки в файлы
sub filedmn :CACHE {
    my ($self) = @_;
    return BM::BannersMaker::Tasks::TaskUtils::domain_to_dir($self->domain);
}

#Вспомогательный метод
#Если существует файл - переименовывает его в _prev
sub _tmp2file {
    my ($self, $filename) = @_;
    if(-e $filename){
        move($filename, "${filename}_prev") or warn("can't move: $!");
    }
    move("${filename}_tmp", $filename) or warn("can't move: $!");
}

#Передаёт указатель на файл-хендлер первым параметром
#Создаёт временный файл, а потом заменяет им существующий, сохраняет предыдущую версию файла
sub data2file {
    my ($self, $filename, $code) = @_;
    open(my $fht, "> $filename"."_tmp");
    $code->($self, $fht);
    close($fht);
    $self->_tmp2file($filename);
}

sub basedir :CACHE {
    my ($self) = @_;
    my $proj = $self->proj;

    #Рабочие директории
    my $work_dir = $proj->options->{$self->{banners_dirs_param} || 'dyn_banners_dirs'}->{work_dir};
    my $temp_dir = $proj->options->{$self->{banners_dirs_param} || 'dyn_banners_dirs'}->{temp_dir};
    (-d $_ || mkdir $_) for $work_dir, $temp_dir; #Проверяем на существование

    return $temp_dir;
}

sub task_log_id :CACHE {
    my ($self) = @_;
    my $task = $self->taskinf;
    my $task_log_id = '';
    $task_log_id = $task->{OrderID};
    #OrderID могут совпадать для разных тасков - добавляем информацию по баннерам
    #my $key = $task->{GroupExportIDs};
    #$key = $task->{BannerIDs} if !$key || ref($key) eq "ARRAY" && !@$key;
    my $key = $task->{BannerIDs};
    $task_log_id .= '_'.join('_', sort @$key) if (ref($key) eq 'ARRAY') && @$key;
    return $task_log_id;
}

sub domain_dir :CACHE {
    my ($self) = @_;
    my $dir = $self->basedir.'/'.$self->filedmn;
    (-d $_ || mkdir $_) for $dir;
    return $dir;
}

#Директория, в которой будут создаваться файлы
sub dir :CACHE {
    my ($self) = @_;

    if( $self->{extdir} ){
        (-d $self->{extdir} || mkdir $self->{extdir}); #Проверяем на существование
        return $self->{extdir};
    }

    my $dmndir = $self->domain_dir.'/'. $self->task_id;
    (-d $_ || mkdir $_) for $dmndir; #Проверяем на существование

    return $dmndir;
}

sub dir_feeddata {
    my ($self) = @_;
    return $self->dir . "/../feeddata";
}

# Возвращает, когда в последний раз задача выполнялась на текущей машине.
sub get_last_exec_time {
    my ($self) = @_;
    my $prev_end = $self->get_param('end');
    if ($prev_end && $prev_end ne '0000-00-00 00:00:00') {
        $self->proj->log(join(' ', sort @{$self->taskinf->{BannerIDs} || []}). " " .$prev_end);
        return $self->proj->dates->trdate('db_time', 'sec', $prev_end);
    } else {
        $self->proj->log(join(' ', sort @{$self->taskinf->{BannerIDs} || []}). " have not started");
        return 0;
    }
}

sub add_param_to_url {
    my ($self, $url, $param) = @_;
    return $url unless $param;
    my $postfx = '';
    if($url =~ s/(\#.+)$//){
        $postfx = $1;
    }
    if( $url =~ /\?/ ){
        $url .= '&'.$param;
    }else{
        $url .= '?'.$param;
    }
    $url .= $postfx if $postfx;
    return $url;
}

sub add_merge_keys {
    my $self = shift;
    $self->process_merge_keys(@_, write_merge_key => 1);
}

# Вычисляет для каждого оффера ключ {merge_key} (нормализованный урл, см. prepare_url_key), сортирует по этому ключу и удаляет дубликаты
# Параметры:
#   $src, $dst - subj
#   dont_keep_prev => 0|1 (default: 0)
#   write_merge_key => 0|1  -- оставлять ли merge_key в фиде (default: 0)
sub process_merge_keys {
    my $self = shift;
    my $filename_src = shift;
    my $filename_dst = shift;
    my %par = @_;
    $self->log("process_merge_keys for $filename_src ...");

    # This is temporary fix to avoid re-export; TODO: delete when bsdb is OK
    $par{write_merge_key} = 1;

    open(my $fin, "< $filename_src");
    open(my $fout, "> ${filename_dst}_key");
    while (defined(my $row = <$fin>)) {
        if ($row =~ /(?:\t|^)url=(.+?)(?:\t|$)/) {
            my $url = $1;
            my $url_key = $self->prepare_url_key($url);
            $url_key =~ s/\t/%09/g; 
            print $fout join("\t", $url, $url_key, $row);
        }
    }
    close($fin);
    close($fout);
    $self->do_sys_cmd("LC_ALL=C sort -t\$'\\t' -k2,2 -k1,1 ${filename_dst}_key > ${filename_dst}_tmp");
    unlink "${filename_dst}_key";

    # оставляем только одну строку из тех, которые не отличаются друг от друга ничем, кроме урла (при этом имеют одинаковый merge_key)
    # берём строку с минимальным урлом
    # рассчитываем на то, что входной файл отсортирован по merge_key
    my %seen = ();
    my $curr_merge_key = '';
    open(my $nodup_fin, "< ${filename_dst}_tmp");
    open(my $nodup_fout, "> ${filename_dst}_nodup_tmp");
    while (defined(my $line = <$nodup_fin>)) {
        chomp $line;
        my (undef, $merge_key, $row) = split /\t/, $line, 3;
        my %kv = map { split /=/, $_, 2 } split /\t/, $row;
        if ( $merge_key ne $curr_merge_key ) {
            $curr_merge_key = $merge_key;
            %seen = ();
        }
        delete $kv{url};
        delete $kv{offer_line_md5};
        my $kv_md5 = md5int( join("\t", map {$_ .'='. $kv{$_}} sort keys %kv ) );
        next if $seen{$kv_md5};
        $seen{$kv_md5} = 1;
        $row = "merge_key=$merge_key\t$row" if $par{write_merge_key};
        print $nodup_fout "$row\n";
    }
    close($nodup_fin);
    close($nodup_fout);
    unlink "${filename_dst}_tmp";

    rename("${filename_dst}_nodup_tmp", "${filename_dst}_tmp")
        or die "Can't rename ${filename_dst}_nodup_tmp to ${filename_dst}_tmp: $!";

    if ($par{dont_keep_prev}) {
        rename("${filename_dst}_tmp", $filename_dst)
            or die "Can't rename to $filename_dst: $!";
    } else {
        $self->_tmp2file($filename_dst);
    }
    $self->log("process_merge_keys done, created: $filename_dst");
}


########################################################
# Логирование в базе
########################################################

#Имя таблицы лога
#Функция должна быть переопределена
sub tablename {
    my ($self) = @_;
    die("Unknown db table for taks logs.");
    return '';
}

#sub dbtable :CACHE {
sub dbtable { #Не кэшируем, так как это сохраняет соединение
    my ($self) = @_;
    return $self->proj->dbtable($self->tablename, 'TaskID', 'bannerland_dbh');
}

sub domain_tablename {
    my ($self) = @_;
    die("Unknown db table for taks logs.");
    return '';
}

#sub domain_dbtable :CACHE {
sub domain_dbtable { #Не кэшируем, так как это сохраняет соединение
    my ($self) = @_;
    return $self->proj->dbtable($self->domain_tablename, 'Domain', 'bannerland_dbh');
}

#Уникальный идентификатор таски
sub task_id :CACHE {
    my ($self) = @_;
    my $task = $self->taskinf;
    my $tid = $task->{OrderID};

    # BannerIDs
    if((ref($task->{BannerIDs}) eq 'ARRAY') && @{$task->{BannerIDs}}) {
        my @bnr_ids = sort @{$task->{BannerIDs}};

        @bnr_ids = (@bnr_ids[0..2], md5int(join " ", @bnr_ids)) if @bnr_ids > 3;

        $tid = join('_', $tid, @bnr_ids);
    }

    if($task->{chunk_count}) {
        $tid = join("_", $tid, $task->{chunk_count}, $task->{chunk_remainder});
    }

    return $tid;
}

sub group_export_id :CACHE {
    my ($self) = @_;
    my $task = $self->taskinf;
    if((ref($task->{GroupExportIDs}) eq 'ARRAY') && @{$task->{GroupExportIDs}}) {
        return join('_', sort @{$task->{GroupExportIDs}});
    } else {
        return '';
    }
}

sub get_export_offers_info {
    my $self = shift;
    my $dbt = $self->dbtable;
    my $tid = $self->task_id;
    my $h = $dbt->Get($tid);
    my $info_str = $h->{export_offers_info};
    return {} if !$info_str;
    my $info = eval { from_json($info_str) };
    return {} if $@;
    return $info;
}

sub set_export_offers_info {
    my $self = shift;
    my $info = shift;
    my $dbt = $self->dbtable;
    my $tid = $self->task_id;
    $dbt->Edit($tid, { export_offers_info => to_json($info) });
    $self->proj->log("export_offers_info was updated!");
}


#Проверяем, есть ли записи по этому таску в БК
sub check_db_obj {
    my ($self) = @_;
    my $dbt = $self->dbtable;
    my $tid = $self->task_id;
    my $h = $dbt->Get($tid);
    unless($h){
        my $task = $self->taskinf;
        $dbt->Add({
            TaskID => $tid,
            OrderID => $self->taskinf->{OrderID},
            BannerIDs => join(' ', sort @{$task->{BannerIDs} || []}),
            ParentExportIDs => join(' ', sort @{$task->{ParentExportIDs} || []}),
            GroupExportIDs  => join(' ', sort @{$task->{GroupExportIDs}  || []}),
            firsttime => $self->curtime,
            %{$self->get_default_params}
        });
    }
}

#Проверяем, есть ли записи по этому таску в БК
sub check_domain_db_obj {
    my ($self) = @_;
    my $dbt = $self->domain_dbtable;
    my $tid = $self->domain;
    my $h = $dbt->Get($tid);
    unless($h){
        my $task = $self->taskinf;
        $dbt->Add({
            Domain => $tid,
        });
    }
}

sub get_default_params {
    my ($self) = @_;

    return {};
}

#Добавляем значения параметров в таблицу
sub set_params {
    my ($self, $h) = @_;
    my $proj = $self->proj;
    $proj->log('set_params task_id: '.$self->task_id);
    $proj->log('set_params: bad task_id') unless $self->task_id;
    return unless $self->task_id;
    $self->check_db_obj;
    my $dbt = $self->dbtable;
    $proj->log('set_params data: '.Dumper($h));
    $dbt->Edit( $self->task_id, $h );
}

#Добавляем значения параметров в таблицу
sub get_param {
    my ($self, $prm) = @_;
    my $h = $self->dbtable->Get($self->task_id);
    return '' unless $h;
    return $h->{$prm};
}

#Добавляем значения параметров в таблицу
sub set_domain_params {
    my ($self, $h) = @_;
    $self->check_domain_db_obj;
    my $dbt = $self->domain_dbtable;
    $dbt->Edit( $self->domain, $h );
}

#Добавляем значения параметров в таблицу
sub get_domain_param {
    my ($self, $prm) = @_;
    my $h = $self->domain_dbtable->Get($self->domain);
    return '' unless $h;
    return $h->{$prm};
}


########################################################
# / Логирование в базе
########################################################

sub get_source_key {
    my ($self, $url) = @_;
    return get_sec_level_domain(url_to_punycode($url));
}

sub find_last_format_file {
    my ($self, $fileformat) = @_;
    opendir my $dir, $self->dir or die "ERROR: Cannot open directory: $!";
    my @files = readdir $dir;
    closedir $dir;
    return unless @files;
    my $filename = max(grep /$fileformat$/, @files);
    return $self->dir . "/$filename";
}

sub download_source_file {
    my ($self, $yt_path, $source_file, $sec_level_domain, %params) = @_;
    my $columns = $params{columns};
    my $format = $params{format} // 'dsv';

    my $proj = $self->proj;
    unless ($sec_level_domain) {
        $proj->log("Can't download without domain");
        return;
    }
    my $tries = 3;
    my $sleep_time_sec = 60;
    my $format_options = {};
    if ($columns) {
        $format_options->{columns} = $columns;
    }
    my $yt_clusters = $proj->options->{yt_clusters};

    my @yt_clients;
    for my $yt_cluster (@{$yt_clusters}) {
        my $yt_client_params = dclone($proj->options->{yt_client_params});
        $yt_client_params->{params}->{$_} = $yt_cluster->{$_} for ("cluster", "proxy");
        push @yt_clients, $proj->get_yt_client(
            %{$yt_client_params},
        );
    }

    my $downloading_file = $source_file . "_DOWNLOADING";
    my $is_download_successfull;
    while (!$is_download_successfull && $tries > 0) {
        $proj->log("try to download");
        for my $yt_client (@yt_clients) {
            if (!$is_download_successfull) {
                eval {
                    $yt_client->read_table_to_file(
                        $yt_path,
                        $downloading_file,
                        $format,
                        format_options => $format_options,
                        key            => $sec_level_domain,
                    );
                    $proj->do_sys_cmd("mv $downloading_file $source_file");
                };
                $is_download_successfull = ($@ || (! -f $source_file)) ? 0 : 1;
            }
        }
        $tries--;
        if (!$is_download_successfull && $tries > 0) {
            $proj->log("downloading failed, sleep $sleep_time_sec seconds");
            sleep $sleep_time_sec;
            $sleep_time_sec *= 2;
        }
    }
    if ($is_download_successfull) {
        $proj->log("downloading successful");
    } else {
        $proj->log("downloading has been failed");
    }
    return $is_download_successfull;
}

# Проверяет файл источника на устаревание и при необходимости выкачивает заново из YT и добавляет в фид поля.
#
# Параметры:
# name               - имя источника (для лога)
# yt_path            - путь к таблице на YT
# sec_level_domain   - ключ для таблицы (домен второго уровня)
# format             - формат YT таблицы (по дефолту 'dsv')
# columns            - столбцы, которые надо скачивать
# add_str            - строка, которую надо добавить к каждой строке (вначале строки) после скачивания (какая-то общая
#                      информация, например, буква источника)
# to_taskdir         - скачивать ли в директорию таски (если 0 или не указано, то в общую папку dyn_sources)
#                      Если скачивается в директорию таски, то к имени фалйа добавляется время (поэтому кэш не работает,
#                      но это нормально, потому что таски и так выпоняются не чаще чем раз в сутки)
# directory_path     - скачать в заданную папку
# force              - скачивать в любом случае
#
# name, yt_path, sec_level_domain, format - обязательные параметры
sub get_source_file {
    my ($self, %prm) = @_;

    $prm{format} //= 'dsv';
    my $name = $prm{name};
    my $sec_level_domain = $prm{sec_level_domain};
    my $add_str = $prm{add_str};

    die "get_source_file called without required params" unless $name && $sec_level_domain && $prm{yt_path};

    my $dir;
    if ($prm{to_taskdir}) {
        $dir = $self->dir;
    } elsif ($prm{directory_path}) {
        $dir = $prm{directory_path};
    } else {
        $dir = $self->domain_dir . '/dyn_sources';
    }

    (-d $_ || mkdir $_) for $dir;
    wait_for_file_lock("$dir/sources.lock", filename => 1) unless $prm{to_taskdir};
    my $proj = $self->proj;
    $proj->log("getting source $name");

    my $filetime = '';
    if ($prm{to_taskdir}) {
        $filetime = $self->filebegtime . '_';
    }
    my $source_filename = "$dir/$filetime${name}_$sec_level_domain";
    $source_filename =~ s/\./_/;

    my $merge_keys_filename = "";
    if ($prm{add_merge_keys}) {
        $merge_keys_filename = $source_filename . '_merge_keys';
    }

    # нужно учитывать, что мог поменяться формат
    my $config_file = "$source_filename.conf";
    my $old_config = (-f $config_file) ? load_json($config_file) : undef;
    my @config_params = qw(yt_path format columns add_str);
    my %new_config = map { $_ => $prm{$_} } grep { defined $prm{$_} } @config_params;

    if (-f $source_filename
          and time - mtime($source_filename) < 3600 * 24
          and $old_config
          and to_json($old_config, {canonical=>1}) eq to_json(\%new_config, {canonical=>1})
          and !$prm{force}
    ) {
        $proj->log("source $name is not outdated");
    } else {
        $proj->log("source $name is outdated, downloading...");
        my $tmp_file = "${source_filename}_tmp";
        if ($self->download_source_file($prm{yt_path}, $tmp_file, $sec_level_domain, %prm)) {
            if ($add_str) {
                my $new_tmp_file = "$tmp_file.add_str";
                open(my $fin, '<', $tmp_file);
                open(my $fout, '>', $new_tmp_file);
                while (defined(my $row = <$fin>)) {
                    print $fout "$add_str$row";
                }
                close($fin);
                close($fout);
                unlink($tmp_file);
                $tmp_file = $new_tmp_file;
            }
            $proj->do_sys_cmd("mv $tmp_file $source_filename");
            save_json(\%new_config, $config_file);
            $proj->log("created `$source_filename'; config saved to `$config_file': " . to_json(\%new_config));
        }
    }
    if (-f $source_filename) {
        $proj->log("source $name: " . $proj->file($source_filename)->wc_l . " rows");
    } else {
        $proj->log("source $name not exists");
    }

    if ( $prm{add_merge_keys} and
        -f $source_filename and !(
            -f $merge_keys_filename and
            mtime($source_filename) <= mtime($merge_keys_filename)
        )
    ) {
        $self->add_merge_keys(
            $source_filename,
            $merge_keys_filename,
            dont_keep_prev => 1,
        );
    }

    release_file_lock("$dir/sources.lock", filename => 1) unless $prm{to_taskdir};
    if ( $prm{add_merge_keys} ) {
        if ( -f $merge_keys_filename ) {
            $proj->log("source $name with merge keys: " . $proj->file($merge_keys_filename)->wc_l . " rows");
        }
        else {
            $proj->log("source $name with merge keys not exists");
        }
        return $merge_keys_filename;
    }
    return $source_filename;
}

sub domains_remove_get_params {
    return undef;
}

sub prepare_url_key {
    my ($self, $url) = @_;

    # punicode, urlencode to utf8
    $url = url_decode_safe($url);

    my $offer_domain = url_to_domain($url);

    my $hdomains_remove_get_params = $self->domains_remove_get_params;
    $url = remove_url_get_params($url) if ( defined($hdomains_remove_get_params) && $hdomains_remove_get_params->{$offer_domain} );
    $url = $self->proj->page(
        $self->proj->page(
            canonical_url(normalize_url($url))
        )->fixed_url
    )->norm_url;
    $url = canonical_url($url);

    return $url;
}

sub get_funnel_for_pt {
    my $self = shift;
    my $pt = shift;
    my $ctx = shift;
    return $ctx->{funnel_for_source}{$pt->offer_source} //= {};
}

# общая часть обработки external-продактов в dyn и perf
# параметры:
#   $pt             нативный, главный продакт
#   ctx =>          (глобальный) контекст
#   ext_data =>     список хэшей {source=>..,ptl=>..}
#   (остальные параметры ppar, могут передаваться в postprocess...)
# возвращает пару ($arr, $rpc)
sub process_offer_generate_external {
    my $self = shift;
    my $pt = shift;
    my %ppar = @_;

    my (@arr, @rpc);
    my $sub_ctx = ($ppar{ctx}{generate_external} //= {});
    my $timer = $ppar{ctx}{timer} // $self->proj->get_new_timer;
    $timer->time('generate:ext');
    my $banners_method = $self->banners_method_name;

    my @ext_data = @{$ppar{ext_data} // []};

    for my $ext_idx (0 .. $#ext_data) {
        my $ext_h = $ext_data[$ext_idx];

        my @sarr;
        my @ptl = @{$ext_h->{ptl} // []};
        for my $pt_idx (0 .. $#ptl) {
            # для каждого $pt нужно подготовить свой контекст вычислений
            # важно сохранять порядок продактов и источников!
            my $call_id  = "ext$ext_idx-pt$pt_idx";
            my $call_ctx = ($sub_ctx->{$call_id} //= {});
            next if !ref($call_ctx);  # $call_ctx = 'done'

            my $ext_pt = $ptl[$pt_idx];
            my ($arr, $rpc) = $ext_pt->$banners_method(main_pt => $pt, ctx => $call_ctx, %{$self->get_banners_method_par($pt)});
            if (!$arr) {
                push @rpc, @$rpc;
                next;
            }

            $sub_ctx->{$call_id} = 'done';
            push @sarr, @$arr;
        }
        my $processed_arr = $self->postprocess_external($pt, \@sarr, $ext_h->{source}, %ppar);
        push @arr, @$processed_arr;
    }

    return (\@arr, \@rpc);
}

# в баннеры из внешних продактов нужно прокинуть поля из описания источников
sub postprocess_external {
    my $self = shift;
    my $pt = shift;
    my $arr = shift;
    my $source_info = shift;
    my %ppar = @_;

    for my $h (@$arr) {
        $h->{letter} = $source_info->{letter};
        $h->{match_type} //= $source_info->{match_type} if $source_info->{match_type};
    }
    return $arr;
}

sub get_banned_simdistances: CACHE {
    my $self = shift;
    my %res_h;

    my $task_id = $self->task_id;
    my $order_id = $self->taskinf->{OrderID};

    my $lf_options = $self->proj->options->{simdistance_filter};

    if ($lf_options->{ban_by_task_id}->{$task_id}) {
        $res_h{$_} = 1 foreach @{$lf_options->{ban_by_task_id}->{$task_id}};
        $self->proj->log('task have banned simdistance:'. join(',', @{$lf_options->{ban_by_task_id}->{$task_id}}));
    }

    if ($lf_options->{ban_by_order_id}->{$order_id}) {
        $res_h{$_} = 1 foreach @{$lf_options->{ban_by_order_id}->{$order_id}};
        $self->proj->log('order have banned simdistance:'. join(',', @{$lf_options->{ban_by_order_id}->{$order_id}}));
    }

    return \%res_h;
}

sub check_deprecated_options {
    my $self = shift;

    for my $option ('dbg', 'test') {
        die join("\n",
            "Your task of type '" . ref($self) . "' has option '$option', which is not supported anymore. It does not do what you expect it to do.",
            "This exception is thrown to prevent any possible harm from running the code which relies on its old behavior.",
        ) if $self->{$option};
    }

    return 1;
}

sub check_banned_title_source {
    my ($self, $title_source, $domain) = @_;
    my $banned_by_domain = $self->proj->options->{domain_banned_title_source};
    if ($banned_by_domain->{$domain} && $banned_by_domain->{$domain}->{$title_source}) {
        return 0;
    }
    my $order_id = $self->OrderID;
    my $banned_by_order = $self->proj->options->{order_banned_title_source};
    if ($banned_by_order->{$order_id} && $banned_by_order->{$order_id}->{$title_source}) {
        return 0;
    }
    return 1;
}

sub check_if_nolimit_order {
    my ($self) = @_;
    my $order_id = $self->OrderID;
    return in_array($order_id, $self->proj->options->{nolimit_orders});
}

sub get_max_phrases_per_banner :CACHE {
    my $self = shift;
    return $self->proj->options->{max_phrases_per_banner}{$self->get_task_type};
}

# отбираем топ по полю {search_count}
# параметры:
#   $arr    -  массив фраз
#   limit   => размер топа
#   sort_by => по какому параметру топ
#   group_by =>  топ в каждой группе по этому ключу
sub get_top_phrases {
    my $self = shift;
    my $phrases = shift;
    my %par = @_;

    if (!$par{limit} or !$par{sort_by}) {
        die "Define correct 'limit' and 'sort_by' args!";
    }

    my $group_by = $par{group_by};
    my %group;
    for my $h (@$phrases) {
        my $key = (defined $group_by) ? $h->{$group_by} : '';
        push @{$group{$key}}, $h;
    }

    my @result;
    for my $arr (values %group) {
        my @arr = @$arr;
        if (@arr > $par{limit}) {
            @arr = sort { $b->{$par{sort_by}} <=> $a->{$par{sort_by}} or $a->{Text} cmp $b->{Text} } @arr;
            @arr = splice(@arr, 0, $par{limit});
        }
        push @result, @arr;
    }
    return \@result;
}

sub filter_phrase_duplicates {
    my $self = shift;
    my $phrases = shift;

    my @uniq_fields = qw(Text PhraseID);
    my %seen_hash = ();
    my @result = ();

    for my $phrase (@$phrases) {
        my $uniq_key = join($;, map {$phrase->{$_}} @uniq_fields);
        next if $seen_hash{$uniq_key}++;
        push @result, $phrase;
    }
    return \@result;
}

# Настройки маппинга/генерации/экспорта_офферов
# timeout =>  если прошло времени больше, то переделываем
# т.к. get_svn_revision_file переопределяется для dyn/perf, нужен DERIVED
sub action_settings :GLOBALCACHE("DERIVED") {
    my $self = shift;
    my %conf = (
        mapping => {
            timeout => 3600 * 24 * 3,
        },
        gen => {
            timeout => 3600 * 24 * 3,
        },
        export_offers => {
            timeout => 3600 * 24 * 3,
        },
    );
    # поле svn_revision хранится в отдельном файле и обновляется красной кнопкой
    my $file = $self->get_svn_revision_file;
    my $svn_conf = load_json($file, { relaxed => 1 }) // {};
    for my $k (keys %conf) {
        $conf{$k}{svn_revision} = $svn_conf->{$k} if $svn_conf->{$k};
    }
    return \%conf;
}

sub get_feeds_md5 {
    my $self = shift;
    my $feeds = shift;
    my @files;
    for my $fd (@$feeds) {
        push @files, $fd->fds->offers_tskv_light_file;
    }
    return get_files_md5(@files);
}

sub get_funnel_from_offers_count {
    my $self = shift;
    my $offers_count = shift;
    my %funnel_for_source;
    while (my ($offer_source, $counts) = each %$offers_count) {
        $funnel_for_source{$offer_source} = {
            offers_in_tskv => ($counts->{total} // 0),
            offers_read_from_tskv => ($counts->{read} // 0),
            offers_required_fields_ok => ($counts->{checked_required_fields} // 0),
            offers_after_targets_total => ($counts->{output} // 0),
        };
    }
    return \%funnel_for_source;
}

# данные, которые пишем в базу после экспорта офферов
sub get_export_offers_resparams {
    my $self = shift;
    my $export_info = shift;

    my $end = $self->curtime;
    my $funnel_for_source = $export_info->{funnel_for_source};
    my $offers_after_targets_total = sum(0, map { $_->{offers_after_targets_total} } values %$funnel_for_source);
    return +{
        begin       => $self->proj->dates->trdate('sec', 'db_time', $self->{_begin_time}),
        end         => $end,
        stagelog    => $self->get_stagelog,
        offerscount => sum(0, map { $_->{offers_total_in_tskv} } values %$funnel_for_source),
        srccount    => sum(0, map { $_->{offers_required_fields_ok} } values %$funnel_for_source),
        FunnelInfo  => "Exported $offers_after_targets_total offers at $end; see YT generation for funnel",
    };
}

# Фильтрация фида и генерация продактов через next_ptl_pack, экспорт таски и офферов
# Проверка, обновился ли фид/таска, создание файла с продактами (см. create_export_offers_file), обновление export_offers_info
# Параметры:
#   feeds  =>  фиды (может быть пустой список)
#   copy_from =>  файл, откуда копировать офферы (если не задан feed)
#   local_output  =>  файл, куда записать результат, без всяких проверок, сразу вызов create_export_offers
# Остальные параметры прокидываются в create_export_offers_file
sub do_export_offers {
    my $self = shift;
    my %opts = (
        timeout => 3600 * 24 * 2,  # Ограничиваем время, так как оно может быть большим (особенно для китайцев, где мы ходим на лендинг)
        @_,
    );
    my $feeds = $opts{feeds};
    my $orig = $opts{copy_from};
    my $orig_info;
    if ($orig) {
        my $last_line = $self->proj->read_sys_cmd("tail -n 1 $orig");
        my $task_row = parse_tskv_line($last_line);
        $orig_info = from_json($task_row->{export_offers_info});
    }
    
    # Для некоторых клиентов ограничиваем кол-во офферов на таску
    my $offers_count_limit = $self->proj->options->{client_offers_in_task_limit}->{$self->taskinf->{ClientID}};
    $opts{offers_count_limit} = $offers_count_limit if $offers_count_limit;

    $self->log("do_export_offers ...");
    my $proj = $self->proj;

    if ($opts{local_output}) {
        my $file = $opts{local_output};
        my $info = $self->create_export_offers_file($feeds, $file, %opts);
        return +{
            file_export_offers => $file,
            export_info => $info,
        };
    }

    my $export_info = $self->get_export_offers_info;
    my $task_md5 = $self->clean_md5;
    my $feed_md5 = (defined $feeds) ? $self->get_feeds_md5($feeds) : $orig_info->{feed_md5};

    my @export_reasons;
    push @export_reasons, 'forced:env' if $ENV{BANNERLAND_FORCE_EXPORT_OFFERS};

    # штатные причины пере-экспорта:
    push @export_reasons, 'task_md5' if ($export_info->{task_md5} // '') ne $task_md5;
    push @export_reasons, 'timeout' if (time - ($export_info->{timestamp} // 0)) > $self->action_settings->{export_offers}{timeout};

    push @export_reasons, 'svn_revision' if ($export_info->{svn_revision} // -1) < ($self->action_settings->{export_offers}{svn_revision} // 0);

    # в export_reasons ещё может попасть product_md5_md5 (изменились продакты),
    # но только при изменении фида; смотрим, нужно ли считать product_md5_md5 заново
    my $new_check_required = (($export_info->{last_checked_feed_md5} // '') ne $feed_md5);

    if (!@export_reasons and !$new_check_required) {
        $self->log("delta_gen: export_offers not required: no reason");
        return;
    }

    my $temp_file = $self->get_tempfile("export_offers", DIR => $self->dir, UNLINK => 1);

    my $new_export_info;
    if (defined $feeds) {
        $self->log("create export_offers from feed ...");
        $new_export_info = $self->create_export_offers_file($feeds, $temp_file, %opts);
    } else {
        $self->log("copy export_offers from file `$orig' ...");
        $new_export_info = $self->copy_export_offers_file($orig, $temp_file);
    }

    if ($new_export_info->{product_md5_md5} ne ($export_info->{product_md5_md5} // '')) {
        $proj->log("delta_gen: product list changed!");
        push @export_reasons, 'product_md5_md5';
    } else {
        $proj->log("delta_gen: product list not changed!");
    }

    $proj->log("delta_gen: export_offers reasons: @export_reasons");
    if (!@export_reasons) {
        # не экспортируем, поэтому оставляем старый export_info
        # но меняем в нём поле {last_checked_feed_md5}, т.к. была новая проверка!
        $export_info->{last_checked_feed_md5} = $feed_md5;
        $self->set_export_offers_info($export_info);
        $proj->log("export_offers_info: last_checked_feed_md5 was updated!");
        return;
    }

    my $export_dir = $self->get_export_offers_dir;
    if (!-d $export_dir) {
        mkdir $export_dir
            or die "Can't create export dir `$export_dir': $!";
    }

    my $export_file = $export_dir.'/'.join('-', $self->task_id, $new_export_info->{timestamp});

    # логирование export_offers в папку с таской
    my $export_file_local = $self->fileformat("export_offers"); # export_offers in task dir
    File::Copy::copy("$temp_file", "$export_file_local")
        or die "Can't copy export_file to `$export_file_local': $!";

    rename($temp_file, $export_file)
        or die "Can't move export_file to `$export_file': $!";

    $self->set_export_offers_info($new_export_info);

    $self->log("do_export_offers done, created `$export_file'");
    return +{
        file_export_offers => $export_file,
        export_info => $new_export_info,
    };
}

# Параметры:
#   $feeds  -  фиды
#   $output_file  -  subj
# Дополнительно:
#   ppar =>     хэш параметров обработки оффера
#   funnel =>   записи для воронки
#   timeout  => subj;
sub create_export_offers_file {
    my $self = shift;
    my $feeds = shift;

    my $output_file = shift;
    my %opts = @_;

    my $proj = $self->proj;
    my $start_time = time();

    my $feed_md5 = $self->get_feeds_md5($feeds);
    my %export_info = (
        task_md5 => $self->clean_md5,
        feed_md5 => $feed_md5,
        timestamp => $start_time,
        svn_revision => $svn_revision,
    );

    open my $fh, '>', $output_file
        or die "Can't open output export file: $!";

    my %task_row = (
        task_id     => $self->task_id,
        timestamp   => $start_time,
        task_inf    => to_json($self->taskinf),
        ppar        => to_json({}),
        AllowedInProductGallery => $self->taskinf->{AllowedInProductGallery} || "true",
        ($self->datacamp_business_id ? (BusinessId => $self->datacamp_business_id): ()),
        ($self->datacamp_shop_id ? (ShopId => $self->datacamp_shop_id): ()),
    );

    my $pack_size = $self->get_ptl_pack_size;
    my $offers_count = {};
    my $timeout = $opts{timeout};
    my $offers_count_limit = $opts{offers_count_limit};
    if (defined $timeout) {
        $self->log("export_offers: timeout: $timeout");
    }
    my $is_need_force_exit;
    my $offers_cnt = 0;
    my @pt_md5;
    my $fd_cnt = 0;
    my $fd_tot = @$feeds;
    FEED: for my $fd (@$feeds) {
        $fd->iter_init;
        $fd_cnt++;
        my $offers_tot = $fd->get_total_offers_count;
        my $pack_tot = int($offers_tot / $pack_size) + (($offers_tot % $pack_size == 0) ? 0 : 1);
        my $pack_cnt = 0;
        PTL: while (defined(my $ptl = $fd->next_ptl_pack($pack_size, check_required_fields => $opts{check_required_fields}, init_specific_product_fields => 1))) {
            $pack_cnt++;
            PT: for my $pt (@$ptl) {
                my $product_md5 = $pt->clean_md5;  # кэшируем md5, чтобы он попал в сериализованную версию
                push @pt_md5, $product_md5;
                # в %row не должно быть task_inf -- это отличает строку оффера от таски
                my %row = (
                    task_id       => $self->task_id,
                    timestamp     => $start_time,
                    product_class => ref($pt),
                    product_inf   => to_json($pt->FREEZE), # сохраняются в т.ч. кэши
                    product_md5   => $product_md5,
                    ($pt->{offerYabsId} ? (OfferYabsId => $pt->{offerYabsId}): ()),
                ); 
                print $fh join("\t", map { $_.'='.encode_tsv($row{$_}) } sort keys %row), "\n";
                $offers_cnt += 1;
                if ($offers_count_limit && $offers_cnt > $offers_count_limit) {
                    $self->log("export_offers: offers_count_limit, force exit");
                    $is_need_force_exit = 1;
                    last PTL;
                }  

                if(defined $timeout and time() - $start_time > $timeout){ #Прерываемся при превышении таймаута
                    # делаем тут, т.к. обход 1000 (размер next_ptl_pack) лендингов может быть достаточно долгим
                    $self->log("export_offers: timeout force exit from ptl iter");
                    last PT;  # save offers_count
                }
            }
            my $ocnt = $fd->get_iter_offers_count;
            my $read_count = sum(0, map { $_->{read} // 0 } values %$ocnt);
            my $out_count = sum(0, map { $_->{output} // 0 } values %$ocnt);
            $self->log("export_offers: feed $fd_cnt/$fd_tot; pack $pack_cnt/$pack_tot; offers (tot/read/out): $offers_tot => $read_count => $out_count");
            
    
            if (defined $timeout and time() - $start_time > $timeout){ # Заново тут, т.к. в предыдущем цикле всё могло отфильтроваться 
                $self->log("export_offers: finished due to timeout");
                $is_need_force_exit = 1;
                last PTL;
            }

            if (defined $timeout and (time() - $start_time) > ($timeout - 4 * 3600)) {
                $pack_size = 1000;
                $pack_tot = $pack_cnt + int(($offers_tot - $read_count) / $pack_size);
                $self->log("export_offers: timeout is close, reduce pack size to $pack_size");
            }
        }
        $fd->iter_finalize;
        patch_hash($offers_count, $fd->get_iter_offers_count, clone_source => 0, add => 1);
        last FEED if $is_need_force_exit;
    }

    $export_info{last_checked_feed_md5} = $feed_md5;
    my $feed_info = load_json($self->info_file_feed);

    if ($self->feedurl) {
        $export_info{main_feed_data_type} = $feed_info->{feed_data_type};
        $export_info{main_feed_file_type} = $feed_info->{feed_file_type};
    }
    if ($self->{_begin_time}) {
        $self->{_timings}->{iron_generation} = {
            start_time => $proj->dates->trdate("sec", "timings", $self->{_begin_time}),
            end_time   => $proj->dates->cur_date('timings'),
            duration   => time() - $self->{_begin_time},
        };
    }

    $export_info{funnel_for_source} = $self->get_funnel_from_offers_count($offers_count);

    $self->{_timings}->{create_export_offers} = {
        start_time => $proj->dates->trdate("sec", "timings", $start_time),
        end_time   => $proj->dates->cur_date('timings'),
        duration   => time() - $start_time,
    };
    if ($self->{_timings}) {
        for my $step (keys %{$self->{_timings}}) {
            $export_info{timings}->{$step} = $self->{_timings}->{$step};
        }
    }
    $export_info{product_md5_md5} = md5_hex(join(' ', sort @pt_md5));  # ascii, no need to call Encode

    $task_row{export_offers_info} = to_json(\%export_info);  # Note: Мах length export_offers_info in DB 2000 VARCHAR

    print $fh join("\t", map { $_.'='.encode_tsv($task_row{$_}) } sort keys %task_row), "\n";
    close $fh;

    return \%export_info;
}

# копируем export_offers, подменяя информацию о таске
sub copy_export_offers_file {
    my $self = shift;
    my $orig = shift;
    my $output = shift;

    my $start_time = time;

    my $task_row;
    open my $ifh, "<", $orig
        or die "Can't open orig export_offers file `$orig': $!";
    open my $ofh, ">", $output
        or die "Can't open for writing export_offers file `$output': $!";
    while (<$ifh>) {
        my $line = $_;
        my $row = parse_tskv_line($line);
        if ($row->{task_inf}) {
            $task_row = $row;
        } else {
            $row->{task_id} = $self->task_id;
            $row->{timestamp} = $start_time;
            print $ofh join("\t", map { $_.'='.encode_tsv($row->{$_}) } sort keys %$row), "\n";
        }
    }
    close $ifh;

    my $export_info = from_json($task_row->{export_offers_info});
    $export_info->{task_md5} = $self->clean_md5;
    $export_info->{copied_from} = $task_row->{task_id};  # original task id
    $export_info->{timestamp} = $start_time;

    $task_row->{task_id} = $self->task_id;
    $task_row->{task_inf} = to_json($self->taskinf);
    $task_row->{export_offers_info} = to_json($export_info);
    $task_row->{timestamp} = $start_time;

    print $ofh join("\t", map { $_.'='.encode_tsv($task_row->{$_}) } sort keys %$task_row), "\n";
    close $ofh;

    return $export_info;
}

sub export_offers_from_empty_feed {
    my $self = shift;
    $self->do_export_offers(feeds => [], @_);
}

sub get_green_url {
    # генерация green_url
    # поле $el->{name} заполняется только для генерации из DSE
    my $self = shift;
    my $pt = shift;
    my $el = shift;
    return $self->proj->phrase( $pt->name )->green_url if $pt->name;
    if ($el->{name}) {
        my $bl_dummy_phrase = $self->proj->options->{dyn_banners_dummy_phrase};
        my $clear_el_name = $el->{name};
        $clear_el_name =~ s/\Q$bl_dummy_phrase\E//;
        return $self->proj->phrase($clear_el_name)->green_url;
    }
    return '';
}

sub geo_list :CACHE {
    my $self = shift;
    if ( ref($self->taskinf->{Resource}{Geo}) eq 'ARRAY' ) {
        return $self->taskinf->{Resource}{Geo};
    } else {
        return [split /[,\s]+/, $self->taskinf->{Resource}{Geo}];
    }
}

sub geo_subtree :CACHE {
    my $self = shift;
    return [ Utils::Regions::geobase_subtree($self->geo_list) ];
}

# DYNSMART-491: подключаем кастомные фразы из фида для некоторых клиентов
sub custom_phrases_settings :GLOBALCACHE {
    my $self = shift;
    my $proj = $self->proj;
    my $dbt;
    if ($ENV{MR_BROADMATCH}) {
        $dbt = $proj->elem_factory->elem_from_db_dump('bannerland', 'PerfAllowCustomPhrases');
    } else {
        $dbt = $proj->dbtable('PerfAllowCustomPhrases', undef, 'bannerland_dbh');
    }
    my %h = map {
        $_->{OrderID} => {
            OnlyClientPhrases  => $_->{OnlyClientPhrases},
            MaxPhrasesPerOffer => $_->{MaxPhrasesPerOffer},
        }} @{$dbt->List};
    return \%h;
}


sub load_custom_phrases_settings :CACHE {
    my $self = shift;
    my $custom_phrases_settings = $self->custom_phrases_settings;
    my $order_id = $self->taskinf->{OrderID};

    if (exists $custom_phrases_settings->{$order_id}) {
        $self->{custom_phrases_allowed} = 1;
        $self->{custom_phrases_max_count_per_offer} = $custom_phrases_settings->{$order_id}->{MaxPhrasesPerOffer} || 1000;
        $self->{OnlyClientPhrases} = $custom_phrases_settings->{$order_id}->{OnlyClientPhrases} || 0;
        $self->proj->log("Custom phrases logic is active, custom_phrases_max_count_per_offer: $self->{custom_phrases_max_count_per_offer}");
    } else {
        $self->{custom_phrases_allowed} = 0;
        $self->{OnlyClientPhrases} = 0;
    }
    1;
}

sub prepare_feeddata {
    my $self = shift;
    my $proj = $self->proj;

    $proj->log("getting feeddata BEG");

    # если стоит блокировка, стучимся с интервалом в минуту, пока не выгрузится _tskv_mpd
    my $num_of_attempts = 120;
    my $got_feeddata_lock = 0;
    if ( !get_file_lock( $self->task_feedurl_lock ) ){
        $proj->log("feeddata are locked, waiting for release...");
        for my $attempt_num (1..$num_of_attempts){
            if ( !get_file_lock( $self->task_feedurl_lock ) ){
                $proj->log("feeddata are locked, attempt $attempt_num...");
                sleep(60) if ($attempt_num < $num_of_attempts);
            } else {
                $proj->log("feeddata were released on attempt $attempt_num");
                $got_feeddata_lock = 1;
                last;
            }
            # если не достучались, попытаемся взять предыдущий tskv, даже если он протухший
            if ( $attempt_num == $num_of_attempts ){
                $proj->log("waiting for lock release is over, we will try to use expired tskv instead");
                $got_feeddata_lock = 0;
            }
        }
    } else {
        $proj->log("got feeddata lock");
        $got_feeddata_lock = 1;
    }

    if ( $got_feeddata_lock ) {
        if ( $self->is_download_needed ){
            $proj->log("download_feeddata beg");
            $self->download_feeddata;
            $proj->log("download_feeddata end");
        }
        release_file_lock( $self->task_feedurl_lock );
    }

    $proj->log("getting feeddata END");
}

# параметры:
#   feed            =>  $fd     -   объект фида (default: фид без фильтров)
sub download_feeddata {
    my $self = shift;
    my %par  = @_;

    my $feed = $par{feed} // $self->get_feed_nofilters;

    my $proj = $self->proj;
    my $new_info = $self->get_feed_info($feed);
    my $mapping_reasons = $self->get_mapping_reasons($new_info);
    $proj->log("delta_gen: mapping reasons: @$mapping_reasons");
    if (!@$mapping_reasons) {
        $proj->log("delta_gen: mapping not required!");
        return;
    }

    $proj->log("get tskv_mpd file beg");
    my $filetskv_mpd = $self->fileformat_feeddata_begtime("tskv_mpd");
    my $filetskv_mpd_tmp = $feed->offers_tskv_mpd_file->{filename};
    $self->process_merge_keys($filetskv_mpd_tmp, $filetskv_mpd);
    $proj->log("file name: $filetskv_mpd");
    my $tskv_mpd_size = $proj->file( $filetskv_mpd )->size;
    $proj->log("file size: $tskv_mpd_size");
    $proj->log("get tskv_mpd file end");

    $proj->log("get feed file beg");
    my $filefeed = $self->fileformat_feeddata_by_tskv_mpd($filetskv_mpd, "feed");
    $feed->fds->_page_text_file_safe_copy($filefeed . "_tmp");
    $proj->log("file name: ${filefeed}_tmp");
    my $feedsize = $proj->file( "${filefeed}_tmp" )->size;
    $proj->log("file size: $feedsize");
    $self->do_sys_cmd("touch -r $filetskv_mpd ${filefeed}_tmp"); # файл фида должен иметь то же время, что и tskv_mpd

    $proj->log("get feed file end");
    $self->_tmp2file($filefeed);

    my $info_file = $self->info_file_feed;
    $new_info->{tskv_mpd} = $filetskv_mpd;

    save_json($new_info, $info_file);
    $proj->log("mapping info saved to $info_file: ".to_json($new_info));
}


sub is_download_needed {
    my ( $self ) = @_;
    my $timeout = 4;
    my $last_tskv_mpd_file = $self->load_feed_info->{tskv_mpd};
    return 1 unless $last_tskv_mpd_file;
    my $hours_expired = $self->proj->file( $last_tskv_mpd_file )->seconds_expired / 3600;
    $self->log("$hours_expired hour(s) expired since creation of $last_tskv_mpd_file");
    return 1 if ( $hours_expired > $timeout );
    return 0;
}

sub get_business_type :CACHE {
    my $self = shift;
    return $self->taskinf->{Resource}{BusinessType};
}

sub get_feedparams {
    my ($self) = @_;
    my $feedurl = $self->feedurl;
    my $filters = $self->filters;
    my $task = $self->taskinf;

    my $feedparams = {
         url => $feedurl,
         filters => $filters,
         ($task->{Resource}{Login}    ? ( login => $task->{Resource}{Login}, ) : ()),
         ($task->{Resource}{Password} ? ( pass  => $task->{Resource}{Password}, ) : ()),
         ($self->get_business_type ? ( business_type => $self->get_business_type) : ()),
         last_valid_feed_type => $task->{Resource}{LastValidFeedType},
         is_new_task => $self->get_param('prev_begin') ? 0 : 1,
    };

    if ($self->is_datacamp_feedurl) {
        $feedparams->{'business_id'} = $self->datacamp_business_id;
        $feedparams->{'shop_id'} = $self->datacamp_shop_id;
        if ($self->datacamp_site) {
            $feedparams->{'offer_source'} = 'site';
            $feedparams->{'force_dc'} = 1;
        }
        $feedparams->{'force_dc'} = 1 if $self->get_datacamp_feedurl_param("force_dc") eq 'true';
        $feedparams->{'datacamp_orig_feedurl'} = $self->datacamp_orig_feedurl;
    }
    return $feedparams;
}

sub get_feed_nofilters {
    my ($self) = @_;
    my $proj = $self->proj;
    my $feedparams = $self->get_feedparams;
    delete $feedparams->{filters};
    return $proj->feed($feedparams);
}

sub get_last_tskv_mpd {
    my ( $self ) = @_;
    my @files_tskv_mpd = $self->get_feeddata_files("tskv_mpd");
    return '' unless  @files_tskv_mpd;
    return $files_tskv_mpd[0];
}

sub load_feed_info {
    my ( $self ) = @_;
    return {} unless $self->feedurl;
    my $feed_info = load_json($self->info_file_feed);

    # TODO: remove, временно, пока не заполниться info_file_feed
    if (!defined $feed_info->{tskv_mpd}) {
        $feed_info->{tskv_mpd} = $self->get_last_tskv_mpd;
    }

    return {} if (!-e $feed_info->{tskv_mpd});
    return $feed_info;
}

sub get_feeddata_files {
    my ($self, $type) = @_;
    $type = "*" unless $type;
    my $glob_templ = $self->dir_feeddata."/*_".$self->feedurl_md5."_$type";
    my @files = glob( $glob_templ );
    return () unless @files;
    @files = sort { $self->proj->file($a)->seconds_expired <=> $self->proj->file($b)->seconds_expired } @files;
    return @files;
}

sub get_feed_info {
    my $self = shift;
    my $feed = shift;
    my $proj = $self->proj;

    my $new_offers_md5 = $proj->file($feed->fds->offers_tskv_light_file)->md5_hex;
    my $new_categs_md5 = $proj->file($feed->fds->categs_tskv_light_file)->md5_hex;

    my $new_info = {
        offers_tskv_light_md5 => $new_offers_md5,
        categs_tskv_light_md5 => $new_categs_md5,
        timestamp             => time,
        svn_revision          => $self->action_settings->{mapping}{svn_revision},
        feed_data_type        => $feed->fds->{feed_data_type},
        feed_file_type        => $feed->fds->{feed_file_type},
    };

    return $new_info;
}

sub get_mapping_reasons {
    my $self = shift;
    my $new_info = shift;
    # функция вызывается под фид-локом, можно не лочить info-файл
    my $info_file = $self->info_file_feed;
    my $feed_info = load_json($info_file) // {};

    my @mapping_reasons;
    push @mapping_reasons, 'offers_tskv_light_md5' if ($feed_info->{offers_tskv_light_md5} // '') ne $new_info->{offers_tskv_light_md5};
    push @mapping_reasons, 'categs_tskv_light_md5' if ($feed_info->{categs_tskv_light_md5} // '') ne $new_info->{categs_tskv_light_md5};
    push @mapping_reasons, 'timeout' if ($new_info->{timestamp} - ($feed_info->{timestamp} // 0)) > $self->action_settings->{mapping}{timeout};
    push @mapping_reasons, 'svn_revision' if $feed_info->{svn_revision} < ($new_info->{svn_revision} // 0);
    push @mapping_reasons, 'forced:env' if $ENV{BANNERLAND_FORCE_MAPPING};
    return \@mapping_reasons;
}

# файл, в котором хранится хэш с информацией о фиде, поля:
#   offers_tskv_light_md5   =>  md5 от offers_tskv_light_file
#   categs_tskv_light_md5   =>  md5 от categs_tskv_light_file
#   timestamp               =>  subj
sub info_file_feed {
    my $self = shift;
    return $self->dir_feeddata.'/info-feed-'.$self->feedurl_md5.'.json';
}

sub fileformat_feeddata_by_tskv_mpd {
    my ($self, $filetskv_mpd_feeddata, $type) = @_;
    $filetskv_mpd_feeddata =~ s/_tskv_mpd$/_$type/;
    return $filetskv_mpd_feeddata;
}

sub fileformat_feeddata_begtime {
    my ($self, $type) = @_;
    my $dir_feeddata = $self->dir_feeddata;
    $self->do_sys_cmd("mkdir -p $dir_feeddata") unless (-d $dir_feeddata);
    return "$dir_feeddata/".$self->filebegtime."_".$self->feedurl_md5."_$type";
}

sub is_datacamp_feedurl {
    my $self = shift;
    my $task_feedurl = $self->taskinf->{Resource}{FeedUrl};
    return Utils::Urls::is_datacamp_feed_url($task_feedurl);
}

sub get_datacamp_feedurl_param {
    my $self = shift;
    my $param = shift;
    my $task_feedurl = $self->feedurl;
    return Utils::Urls::get_cgi_url_param($task_feedurl, $param);
}

sub feedurl :CACHE {
    my $self = shift;
    return $self->taskinf->{Resource}{FeedUrl};
}

sub datacamp_business_id : CACHE {
    my $self = shift;
    return $self->taskinf->{BusinessID} if $self->taskinf->{BusinessID};
}

sub datacamp_shop_id : CACHE {
    my $self = shift;
    return $self->taskinf->{ShopID} if $self->taskinf->{ShopID};
}

sub datacamp_site : CACHE {
    my $self = shift;
    return unless $self->datacamp_business_id;
    return $self->get_datacamp_feedurl_param("site");
}

sub datacamp_orig_feedurl : CACHE {
    my $self = shift;
    return unless $self->is_datacamp_feedurl;
    return $self->get_datacamp_feedurl_param("url") if ($self->is_datacamp_feedurl);
    return $self->feedurl;
}

sub feed_or_datacamp_domain : CACHE {
    my $self = shift;

    if (! $self->is_datacamp_feedurl) {
        return $self->proj->page($self->feedurl)->domain;
    }

    my $orig_url = $self->get_datacamp_feedurl_param("url") || $self->get_datacamp_feedurl_param("site");
    if ($orig_url) {
        return $self->proj->page($orig_url)->domain;
    }
    return "market.feed";
}

sub feedurl_md5 :CACHE {
    my $self = shift;
    return md5int( $self->feedurl );
}

sub OrderID : CACHE {
    my $self = shift;
    my $order_id = $self->taskinf->{OrderID} || '';
    $order_id =~ s/^\s+|\s+$//g;
    return $order_id;
}

sub GroupExportID :CACHE {
    my $self = shift;
    $self->get_value_from_list_field('GroupExportIDs');
}

sub get_feed_by_extfile {
    my ($self, $filename, %add_par) = @_;
    my $proj = $self->proj;
    my $filters = $self->filters;
    my $task = $self->taskinf;
    my $feedparams = {
         extfile => $filename,
         filters => $filters,
         datatype => 'offers_tskv',
         ($task->{Resource}{Login}    ? ( login => $task->{Resource}{Login}, ) : ()),
         ($task->{Resource}{Password} ? ( pass  => $task->{Resource}{Password}, ) : ()),
         ($self->get_business_type ? ( business_type => $self->get_business_type) : ()),
         %add_par,
    };
    return $proj->feed($feedparams);
}

sub get_feed_by_tskv_mpd {
    my $self = shift;
    return $self->get_feed_by_extfile(@_, mapping_done => 1);
}

sub filter_external_offers {
    my $self = shift;
    my $offers = shift;
    my @res;
    my %seen_hash;
    for my $offer (@$offers) {
        my %offer = %$offer;
        my @phrases_fields = grep { /phrases/ } keys %offer;
        if (@phrases_fields != 1) {
            $self->log("WARN: offers from external source have zero or more than one phrases fields, duplicate removal wont be used");
        } else {
            my $phrases_field = $phrases_fields[0];
            my $check_seen_offer_string = join("\t", map {$_ .'='.$offer{$_} } grep {$_ !~ /(^url$|phrases)/} sort keys %offer );
            my @new_phrases = ();
            for my $phr (split /,/, $offer{$phrases_field}) {
                push @new_phrases, $phr if !$seen_hash{$check_seen_offer_string}{$phr}++;
            }
            next if (! @new_phrases) && (! join("", map { "$offer{$_}" } grep { /title/ } keys %offer));
            $offer{$phrases_field} = join(',', @new_phrases);
        }
        push @res, \%offer;
    }
    return @res;
}

#Фильтры, которые нужно накладывать на результат
sub filters :CACHE {
    my ($self) = @_;
    my $task = $self->taskinf;
    my $targdata = $task->{Resource}{Targets}; # Фильтры для фида

    unless( $targdata ){ # Нет данных по фильтрам
        $self->proj->log("WARN: Empty filters");
        return {};
    }

    if ($self->datacamp_site) { # для оферов дикого интрнета из датакемпа добавляем фильтр по disable_status и по available
        $targdata->{$_}{"site_disable_status NOT"} = "true" foreach keys %$targdata;
    }

    $self->proj->log("filter: ", $targdata);
    return $targdata;
}

sub get_all_tasks_from_yt_table {
    my $self = shift;
    my $table = shift // $self->get_default_input_tasks_yt_table();
    my $proj = $self->proj;
    my $yt_client = $proj->yt_client;
    my $begin_time = time();

    my $BSTaskID = $yt_client->get_attribute($table, 'task_id');
    my $GenerationID = $yt_client->get_attribute($table, 'generation_id');

    my @parent_fields = qw(BannerIDs TemplateIDs ParentExportIDs);
    my @json_fields = (@parent_fields, qw(Resource));

    $proj->log("Starting to download YT-table with all tasks");
    $table = $table."{OrderID,GroupingID,CounterID,ClientID,GroupExportID,ShopID,BusinessID,Resource,BannerIDs,ParentExportIDs,TemplateIDs,GroupExportIDs,ParentBannerId,CampaignId,AllowedInProductGallery,DirectFeedID}";
    my $task_rows = $self->proj->yt_client->read_table($table, "'<encode_utf8=%false>json'");
    $proj->log("Finishing to download YT-table with all tasks");
    if (!$task_rows or !@$task_rows) {
        die "Can't get perf tasks from yt!";
    }

    my $tasks = [];
    my $tasks_counter = 0;
    for my $task (@$task_rows) {
        if ($tasks_counter % 1000 == 0) {
            $proj->log("$tasks_counter has been handled in 'get_all_tasks_from_yt_table'.");
        }
        $tasks_counter += 1;

        $task->{BSTaskID} = $BSTaskID;
        $task->{GenerationID} = $GenerationID;
        $task->{BannerlandBeginTime} = $begin_time;
        eval { $task->{$_} = $proj->json_obj->decode($task->{$_}) for @json_fields };

        # GroupExportIDs becomes deprecated and will disappear from new task tables
        if (!$task->{GroupExportID}) {
            my $group_ids = $proj->json_obj->decode($task->{GroupExportIDs});
            $task->{GroupExportID} = $group_ids->[0];
        }
        $task->{GroupExportIDs} = [$task->{GroupExportID}];

        if ($@) {
            $proj->logger->error("ERROR: JSON parse error: $@", $task);
            next;
        }

        # порядок BannerIDs не важен; важно лишь соответствие BannerID<->ParentExportID<->TemplateIDs
        # для определённости сортируем BannerIDs
        my @BannerIDs = @{$task->{BannerIDs}};
        my @idx = sort { $BannerIDs[$a] <=> $BannerIDs[$b] } 0 .. $#BannerIDs;
        for my $f (@parent_fields) {
            $task->{$f} = [ map { $task->{$f}[$_] } @idx ];
        }

        push @$tasks, $task;
    }
    return $tasks;
}

sub is_blocked_task {
    my $self = shift;
    return 1 if $self->proj->options->{blocked_task_ids}->{$self->task_id};
    return 1 if $self->proj->options->{blocked_order_ids}->{$self->OrderID};
    return 0;
}

sub get_task_type {
    my $self = shift;
    $self->log( "ERROR: called abstract function get_task_type" );
    return '';
}

sub get_optional_fields {
    die "ERROR: called abstract function get_optional_fields";
}

sub get_result_fields {
    die "ERROR: called abstract function get_result_fields";
}

sub grep_is_bad_flags {
    my $self = shift;
    my @textflags = @_;
    my @all_badflags = map {[split '&']} keys $self->badflags;  # внутри флагов поддерживается логическое и: &

    my %flags = map {$_ => 1} @textflags;
    my %res_badflags = ();
    for my $bad_flags (@all_badflags) {

        if (scalar (grep {$flags{$_}} @$bad_flags ) == scalar @$bad_flags) {
            $res_badflags{$_} = 1 foreach @$bad_flags;
        }
    }

    my @res_badflags = keys %res_badflags;
    return @res_badflags;
}

# Определяем валюту по стране, если в geo указана одна страна
sub get_region_currency {
    my $self = shift;
    my $proj = $self->proj;
    my $target_countries_ids = $self->get_geo_countries;
    return undef if scalar(@$target_countries_ids) != 1;
    my $target_country_id = $target_countries_ids->[0];
    my $countries = $proj->options->{strict_geo_currency};
    return undef unless $countries->{$target_country_id};
    my $currency_id = (keys %{$countries->{$target_country_id}{currencies}})[0];
    return $currency_id;
}

sub get_priority_order {
    die "get_priority_order called from abstract task";
}

sub _get_priority_order_hash :CACHE {
    my $self = shift;
    my $order = $self->get_priority_order;
    my $i = 0;
    my %order_hash = map {$_ => $i++} split //, $order;
    return \%order_hash;
}

# сортирует входной массив по ключу letter в соответствии с priority_order
# буквы, которых нет в priority_order, кладутся в конец по возрастанию ord
sub sort_phrases_by_priority_order {
    my $self = shift;
    my $phrases = shift;
    my $order_hash = $self->_get_priority_order_hash;
    my $max_ind = length(keys(%$order_hash));
    return [sort {($order_hash->{$a->{letter}} // $max_ind+ord($a->{letter})) <=> ($order_hash->{$b->{letter}} // $max_ind+ord($b->{letter}))} @$phrases];
}

sub check_banner_phrase_fields {
    my $self = shift;
    my $bp = shift;
    my $optional_fields = $self->get_optional_fields();

    my @undef_fields = ();
    for my $field (keys %$bp) {
        if (!$optional_fields->{$field} && !defined($bp->{$field})) {
            push(@undef_fields, $field);
        }
    }
    if (scalar(@undef_fields)) {
        die "Error: " . join(", ", @undef_fields) . " - fields must be defined!";
    }
}

sub cast_result_values {
    my $self = shift;
    my $bp = shift;
    my $result_fields = $self->get_result_fields();
    my $result_fields_types = { map {$_->{'name'} => $_->{'type'}} @$result_fields };

    for my $field (keys %$bp) {
        if (defined($bp->{$field}) && $result_fields_types->{$field} =~ m/int/) {
            $bp->{$field} += 0;
        }
    }
}

sub fill_use_as_field {
    my $self = shift;
    my $feed = shift;
    my $use_as_from_direct = shift;
    my @use_as_name_fields = split /\s*,\s*/, $use_as_from_direct;
    @use_as_name_fields = map { $feed->fds->get_origin_field_name($_) } @use_as_name_fields;
    return join(',', @use_as_name_fields);
}

1;
