#!/usr/bin/perl

=head1 NAME

$Id: $

=head1 DESCRIPTION

    Конвертация настроек АДФ/ДРФ в настройки объединенного авторасширения фраз
    Выставляется ограничение на расход бюджета (broad_match_limit) на основании статистики по расходу за последние 7 дней
    Логика определения выставляемого значения:
        По статистике за 7 дней вычисляем процент расхода бюджета по автоматически добавленным фразам относительно общего расхода на кампанию.
        Если статистики нет, или расчитанное значение процента меньше 5% - принимаем его равным 5%.
        Затем
         - если для кампании включен ДРФ в режиме отличном от "minimal", значение broad_match_limit больше нуля и меньше расчетного - сохраняем пользовательское значение;
         - в противном случае выставляем расчетное значение.
        

=head1 RUNNING

    ./protected/one-shot/convert_adf_drf.pl --shard-id N --workers M
    ./protected/one-shot/convert_adf_drf.pl --shard-id N --cid 12345,12346,12347

=cut


use Direct::Modern;
use my_inc "../..";

use Fcntl qw/:flock SEEK_END/;
use POSIX qw/ceil/;
use Parallel::ForkManager;
use Time::HiRes qw/gettimeofday tv_interval/;

use Yandex::DBTools;
use Yandex::DateTime;

use Settings;
use ScriptHelper 
    sharded => 1, 
    'Yandex::Log' => 'messages';
use Campaign::RelatedKeywordsBudgets;

# Количество кампаний, обрабатываемых за одну итерацию
my $MAX_CAMPAIGNS = 10000;
my $CAMPAIGNS_PER_WORKER = 300;
my $WORKERS_DEFAULT = 8;
#Значение процента расхода бюджета, выставляемое в случае отсутствия достаточного количества данных для расчета
my $DEFAULT_PERCENT = 5;
#Период, за который считается статистика, в днях
my $STAT_PERIOD = 7;

my ($cids_string, $workers_count, $dry_run, $to_file, $from_file);
extract_script_params(
    'cid=s' => \$cids_string,
    'workers=i' => \$workers_count,
    'dry-run' => \$dry_run,
    'to-file=s' => \$to_file,
);

$workers_count ||= $WORKERS_DEFAULT;

$log->out("Starting - workers: $workers_count", $cids_string ? ", cids: $cids_string" : '');

# Засекаем время начала работы
my $START_TIME = [gettimeofday];

my ($FROM, $TO) = map {$_->ymd('-')} (now() - duration($STAT_PERIOD.'d'), now());

my $pm = Parallel::ForkManager->new($workers_count);

my $last_cid;
my $total = 0;

my @cids = grep {/^\d+$/} split /\s*,\s*/, ($cids_string // '');

while (my $campaigns = get_campaigns($last_cid, \@cids)){
    $last_cid = $campaigns->[-1]->{cid} if @$campaigns;
    
    while(my @chunk = splice @$campaigns, 0, $CAMPAIGNS_PER_WORKER) {
        $total += @chunk;
        $log->out('[total] '.$total);
        $pm->start and next; 
        eval {convert(\@chunk); 1} or $log->out('[ERROR] '.$@);
        
        $pm->finish; # Terminates the child process
    }
}
$pm->wait_all_children;

$log->out(sprintf('script finished at time %.3f sec, %s campaigns processed', tv_interval($START_TIME,[gettimeofday]), $total));

$log->out('FINISH');

exit 0;



sub get_campaigns {

    my ($last_cid, $only_cids) = @_;
    $last_cid //= 0;
    $log->out('Fetching campaigns for conversion');
    my $campaigns = get_all_sql(PPC(shard => $SHARD),
                        [ q/SELECT
                                c.cid,
                                c.OrderID,
                                c.AgencyUID,
                                co.is_related_keywords_enabled,
                                co.broad_match_flag,
                                co.broad_match_limit,
                                co.broad_match_rate
                            FROM campaigns c JOIN camp_options co ON (c.cid=co.cid)/,
                            WHERE => {
                                @$only_cids ? ('c.cid' => $only_cids) : (),
                                'c.cid__gt' => $last_cid,
                                'c.type__ne' => 'wallet',
                                'c.archived' => 'No',
                                _OR => {'co.is_related_keywords_enabled' => 1,
                                    _AND => {'co.broad_match_flag' => 'Yes', 'co.broad_match_rate__ne' => 'unused'},
                                },
                                
                            }, 'ORDER BY' => 'cid', LIMIT => $MAX_CAMPAIGNS
                        ]
    );
    my $c_cnt = scalar(@$campaigns);
    return unless $c_cnt;
    $log->out("Got $c_cnt campaigns (cid > $last_cid)");

    return $campaigns;
}

sub convert {

    my ($campaigns) = @_;

    my $start_camp = [gettimeofday];
   
    $log->out(
        sprintf('Running new worker [%s] for %s campaigns, start_time %.3f sec', $0, scalar @$campaigns, tv_interval($START_TIME,$start_camp))
    );
    my $update_data = {};
    my $stat_data = [];
    for my $camp (@$campaigns){
        my $order_id = $camp->{OrderID};        
        my $stat;
        if ($order_id > 0) {
            unless (eval {$stat = Campaign::RelatedKeywordsBudgets::get_related_keywords_budgets(from => $FROM, to => $TO, order_ids => [$order_id]); 1}) {
                $log->out(sprintf('[error] cid: %s text: %s', $camp->{cid}, $@));
                next;
            }
            $stat = $stat->{$order_id};
        }
        else {
            $stat = {percent => undef};
        }

        my $percent = $stat->{percent} || $DEFAULT_PERCENT;
        #Для АДФ не ставим лимит больше 80%
        $percent = 80 if $camp->{broad_match_flag} ne 'Yes' && $percent > 80;
        
        if ($percent && $percent < $DEFAULT_PERCENT) {
            $percent = $DEFAULT_PERCENT;
        }
        if ($camp->{broad_match_flag} eq 'Yes' && $camp->{broad_match_limit} > 0 &&
                ( $camp->{broad_match_rate} ne 'minimal' || $percent > $camp->{broad_match_limit})
            ) {
            $percent = $camp->{broad_match_limit};
        }
        
        $percent = ceil($percent);
        
        push @$stat_data, {
            cid => $camp->{cid},
            order_id => $camp->{OrderID},
            agency => $camp->{AgencyUID} // 0,
            total  => $stat->{total},
            sum => $stat->{sum},
            adf => $camp->{is_related_keywords_enabled},
            drf => $camp->{broad_match_flag} eq 'Yes' ? 1 : 0,
            limit => $camp->{broad_match_limit} // 0,
            new_limit => $percent,
            bm_rate => $camp->{broad_match_rate},
        };
    
        $update_data->{$camp->{cid}} = {'co.broad_match_limit' => $percent};
        $log->out(
            "converted - cid: $camp->{cid} order_id: $order_id percent: ".($stat->{percent} // 'undef')." budget_limit: $percent".(
                $camp->{broad_match_flag} eq 'Yes' ? ' broad_match_limit: '.($camp->{broad_match_limit} // 'undef').' broad_match_rate: '.$camp->{broad_match_rate} : ''
            )
        );
    }
    write_stat('stat_'.$SHARD, $stat_data);
    write_to_file($to_file, $update_data) if defined $to_file;
    unless ($dry_run) {
        do_mass_update_sql(PPC(shard => $SHARD), 'camp_options co JOIN campaigns c ON (c.cid = co.cid)',
            'co.cid' => $update_data,
            byfield_options => {
                'c.LastChange' => {default__dont_quote => 'Now()'},
                'co.broad_match_rate' => {default => 'unused'},
                'co.broad_match_flag' => {default => 'Yes'},
            }
        );
    }
    
    $log->out(
        sprintf('[%s] Processing of %s campaigns took %.3f sec', $$, scalar @$campaigns, tv_interval($start_camp, [gettimeofday]))
    );

    return;
}

sub write_to_file {
    my ($filename, $data) = @_;
    
    open(my $fh, '>>', $filename) or die "Couldn't open $filename: $!";
    flock($fh, LOCK_EX);
    seek($fh, 0, SEEK_END) or die "Couldn't seek - $!";
    print $fh "$_\t$data->{$_}->{'co.broad_match_limit'}\n" foreach keys %$data;
    close($fh);
 
    return;
}

sub write_stat {
    my ($filename, $data) = @_;
    
    return unless @$data;
    
    open(my $fh, '>>', $filename) or die "Couldn't open $filename: $!";
    flock($fh, LOCK_EX);
    seek($fh, 0, SEEK_END) or die "Couldn't seek - $!";
    foreach my $row (@$data) {
        my $str = sprintf '{"adf":%d,"drf":%d,"limit":%d,"bm_share":%d,"bm_rate":"%s","agency":%d,"cid":%d,"OrderID":%d,"total":%.4f,"sum":%.4f}', @$row{qw/adf drf limit new_limit bm_rate agency cid order_id total sum/};
        print $fh "$str\n";
    }
    close($fh);
 
    return;
}
