package Direct::YT::Export::BmAllBanners;

=head1 NAME
    
    Direct::YT::Export::BmAllBanners - экспорт всех баннеров для Бродматча

=head1 SYNOPSIS

    PERL5LIB=protected pyt Direct::YT::Export::BmAllBanners

=cut

use strict;
use warnings;

use JSON;

use Yandex::YT;
use Yandex::YT::Streaming;

use Yandex::StopWords;
$Yandex::YT::JOB_ADD_FILES{'stopwords'} = $Yandex::StopWords::STOP_WORDS_FILE;
$Yandex::StopWords::STOP_WORDS_FILE = 'stopwords' if !-f $Yandex::StopWords::STOP_WORDS_FILE;

$Yandex::YT::Streaming::DEFAULT_FORMAT = 'json';

# Курс нашей y.е. в рублях с НДС
our $CONV_UNIT_RATE = 30; 

sub make_schema {
    my @ret;
    while(my ($type, $cols) = splice @_, 0, 2) {
        push @ret, {name => $_, type => $type} for @$cols;
    }
    return @ret;
}

my @banners_schema = make_schema(
    int64 => [qw/cid pid bid OrderID BannerID uid
                image_flag is_active is_archived is_camp_archived is_moderated is_show creative_id
                /],
    string => [qw/banner_type body domain geo href lang title title_extension yacontextCategories phrases image_hash image_ad_hash/],
    );

my @adgroups_schema = make_schema(
    int64 => [qw/cid pid/],
    string => [qw/minus_words phrases/],
    );

job 'banners',
    vars_spec => {
        export_path => qr/^[\w\/_-]+$/,
        tmp_path => qr/^[\w\/_-]+$/,

        # для дебага
        cids_range => {default => '', check => qr/^(|\[(\d+)?:(\d+)?\])$/},

        stat_from_date => qr/^\d{4}-\d{2}-\d{2}/,
        db_upload_time => qr/^\d{4}-\d{2}-\d{2}/,
    },
    tasks => [
        [remove => '$tmp_path/bm_banners.new', force => 1],
        [remove => '$tmp_path/bm_adgroups.new', force => 1],
        [create_table => '$tmp_path/bm_banners.new',
         schema => \@banners_schema,
        ],
        [create_table => '$tmp_path/bm_adgroups.new',
         schema => \@adgroups_schema,
        ],
        [map_reduce => ['materialize_table_index', 'export_banners'],
         src => '
                //home/direct/db/campaigns{cid,OrderID,uid,archived,sum,sum_spent,wallet_sum,wallet_sum_spent,statusShow}$cids_range
                //home/direct/db/phrases{cid,pid,geo,statusPostModerate,mw_text,mobile_store_content_href}$cids_range
                //home/direct/db/bids{cid,pid,id,PhraseID,phrase,price_rub,stat_shows,stat_clicks,stat_pshows,stat_pclicks,is_suspended,statusModerate}$cids_range
                //home/direct/db/banners{cid,pid,bid,BannerID,banner_type,title,title_extension,body,href,domain,statusShow,statusArch,statusPostModerate,image_BannerID,image_id,image_hash,creative_id,image_ad_hash}$cids_range
                ', 
         dst => '
                $tmp_path/bm_banners.new
                $tmp_path/bm_adgroups.new
                ',
         spec => 'mapper={layer_paths=["//porto_layers/yt_packages_from_banach.tar.gz"]};'
                .'reducer={layer_paths=["//porto_layers/yt_packages_from_banach.tar.gz"]};'
                .'scheduling_tag_filter=porto',
        ],
        [sort => ['bid'],
         src => '$tmp_path/bm_banners.new',
         dst => '$tmp_path/bm_banners.new',
         spec => 'partition_count=256',
        ],
        [remove => '$export_path/bm_banners', force => 1],
        [move => '',
          src => '$tmp_path/bm_banners.new',
          dst => '$export_path/bm_banners',         
        ],
        [sort => ['pid'],
         src => '$tmp_path/bm_adgroups.new',
         dst => '$tmp_path/bm_adgroups.new',
         spec => 'partition_count=64',
        ],
        [remove => '$export_path/bm_adgroups', force => 1],
        [move => '',
          src => '$tmp_path/bm_adgroups.new',
          dst => '$export_path/bm_adgroups',
        ],
        [set => '$export_path/bm_banners', attr => 'upload_time', value => '$db_upload_time'],
        [set => '$export_path/bm_adgroups', attr => 'upload_time', value => '$db_upload_time'],
    ];

# превращаем @table_index(номер таблицы в запросе) -> поле ti 
# для дальнейшей сортировки по нему (и reduce-side join)
mapper 'materialize_table_index',
    sub {
        my ($s, $vars) = @_;
        while(my $r = $s->get()) {
            $r->{ti} = $s->{_attrs}->{table_index};
            $s->yield($r);
        }
};

=head2 norm($string)

    Как-бы "нормализация" строки.
    undef превращаем в пустую строку,
    последовательности пробельных символов схлопываем до одного пробела

=cut
sub norm($) {
    return '' if !defined $_[0];
    return $_[0] =~ s/\s+/ /gr;
}

reducer 'export_banners',
    reduceby => 'cid',
    sortby => ['pid', 'ti'],
    sub {
        my ($s, $vars) = @_;
        while(my $g = $s->get_group()) {
            my $camp = $s->get();
            if ($camp->{ti} != 0) {
                $s->pass_group();
                next;
            }
            my $is_camp_active = camp_is_active($camp);
            my $adgroup;
            while(my $r = $s->get()) {
                if ($r->{ti} == 1) {
                    # phrases
                    $adgroup = $r;
                    $adgroup->{phrases} = [];
                } elsif ($r->{ti} == 2 && $adgroup && $r->{pid} == $adgroup->{pid}) {
                    # bids
                    next unless $r->{statusModerate} eq 'Yes';
                    $r->{phrase} = Yandex::StopWords::process_quoted_phrases($r->{phrase});
                    $r->{is_active} = $r->{is_suspended} ? 0 : 1;
                    push $adgroup->{phrases}, 
                        join ":", $r->{phrase}, sprintf("%.2f", $r->{price_rub} / $CONV_UNIT_RATE),
                                    map {$_//0} @{$r}{qw/stat_shows stat_clicks stat_pshows stat_pclicks is_active PhraseID/};
                } elsif ($r->{ti} == 3 && $adgroup && $r->{pid} == $adgroup->{pid}) {
                    # banners
                    my $phrases = norm(join ',', @{$adgroup->{phrases}});
                    if (!$adgroup->{_yielded}) {
                        my $minus_words_string = format_minuswords($adgroup->{mw_text});
                        $s->yield({pid => int($adgroup->{pid}), phrases => $phrases, cid => $camp->{cid}, minus_words => $minus_words_string}, 1);
                        $adgroup->{_yielded} = 1;
                    }

                    my %res;
                    $res{$_} = $camp->{$_} // '' for qw/cid OrderID uid/;

                    $res{$_} = $adgroup->{$_} // '' for qw/pid/;
                    $res{$_} = norm($adgroup->{$_}) for qw/geo/;
                    $res{phrases} = $phrases;

                    $res{$_} = norm($r->{$_}) for qw/title title_extension body domain banner_type image_hash image_ad_hash/;
                    $res{href} = norm($adgroup->{mobile_store_content_href} ? $adgroup->{mobile_store_content_href} : $r->{href} && $r->{href} !~ /^https?:\/\// ? "http://$r->{href}" : $r->{href});
                    $res{creative_id} = $r->{creative_id};
                    $res{is_active} = $is_camp_active && $r->{BannerID} && $r->{statusShow} eq 'Yes' ? 1 : 0;
                    $res{is_moderated} = $r->{statusPostModerate} eq 'Yes' && $adgroup->{statusPostModerate} eq 'Yes' ? 1 : 0;
                    $res{is_show} = $r->{statusShow} eq 'Yes' ? 1 : 0;
                    $res{is_archived} = $r->{statusArch} eq 'Yes' ? 1 : 0;
                    $res{is_camp_archived} = $camp->{archived} eq 'Yes' ? 1 : 0;
                    $res{lang} = 'dummy';
                    $res{yacontextCategories} = ''; # см. DIRECT-85640
                    
                    for my $image_flag (0, 1) {
                        next if $image_flag && !defined $r->{image_id};
                        $res{image_flag} = $image_flag;
                        ($res{bid}, $res{BannerID}) = @{$r}{$image_flag ? ('image_id', 'image_BannerID') : ('bid', 'BannerID')};
                        $s->yield(\%res);
                    }
                    
                }
            }
        }
    };

=head2 camp_is_active

    вычисление флажка "активности" кампании, по алгоритму специфичному только для БродМатча

=cut
sub camp_is_active {
    my ($camp) = @_;
    my $camp_sum_rest = ($camp->{sum}||0) - ($camp->{sum_spent}||0) + ($camp->{wallet_sum}||0) - ($camp->{wallet_sum_spent}||0);
    return $camp_sum_rest > 1e-7 && $camp->{statusShow} eq 'Yes' ? 1 : 0;
}


=head2 format_minuswords

    Аналог MinusWordsTools::minus_words_array2str_with_brackets(MinusWordsTools::minus_words_str2array($text))
    Вынесен сюда, чтобы не тащить в YT все наши перловые зависимости

=cut
sub format_minuswords {
    my $text = shift;
    return "" unless $text;
    my $array = eval {from_json($text)} || return "";
    return join " ", map {/\ / ? "($_)" : $_} @$array;
}

1;


