#!/usr/bin/perl

use my_inc "..";

=head1 METADATA

<crontab>
    time: 12 4 * * *
    package: scripts-switchman
    <switchman>
        group:  scripts-other
        <leases>
            mem: 1536
        </leases>
    </switchman>
</crontab>

<juggler>
    host:   checks_auto.direct.yandex.ru
    ttl:            2d6h
    tag: direct_group_internal_systems
</juggler>

=cut

use warnings;
use strict;
use utf8;

####################################
#
#   Вычисление CTR для всех баннеров, сохранение во временную таблицу
#
#   $Id$
#
####################################

use Yandex::DBTools;

use Settings;
use ScriptHelper 'Yandex::Log' => 'messages';

use HashingTools;
use ShardingTools;

use open ':std' => ':utf8';

# Minimal count of (p)shows need for the (p)ctr to be 
my $show_min = 200;
my $pshow_min = 200;

$log->out('START');

# формат: norm_phrase -> [sum(ctr), count(ctr), sum(pctr), count(pctr)]
my %STAT;

for my $shard (ppc_shards()) {
    $log->msg_prefix("[shard $shard]");

    $log->out('Execute query');
    my $dbh = get_dbh(PPC(shard => $shard));
    $dbh->{mysql_use_result} = 1;

    my $sth = exec_sql($dbh, "
                          select bi.norm_phrase,
                                 if( auct.shows >= $show_min , auct.clicks/auct.shows  , NULL),
                                 if(auct.pshows >= $pshow_min, auct.pclicks/auct.pshows, NULL)
                            from bs_auction_stat auct
                                 join bids bi on auct.pid = bi.pid and auct.PhraseID = bi.PhraseID
                           where auct.PhraseID > 0 and ( auct.shows >= $show_min or auct.pshows >= $pshow_min )
                       ");
    
    $log->out('Fetching phrases');
    while( my $rows = $sth->fetchall_arrayref(undef,100_000) ) {
        for my $row (@$rows) {
            my $stat = $STAT{$row->[0]} //= [0, 0, 0, 0];
            # ctr
            if (defined $row->[1]) {
                $stat->[0] += $row->[1];
                $stat->[1] ++;
            }
            # pctr
            if (defined $row->[2]) {
                $stat->[2] += $row->[2];
                $stat->[3] ++;
            }
        }
    }
    $sth->finish();
}
$log->msg_prefix(undef);

# делаем из хэшей массив массивов для do_mass_insert
$log->out('Collecting data for insert');
my @insert_rows;
while(my ($norm_phrase, $stat) = each %STAT) {
    my $hash = url_hash_utf8($norm_phrase);
    push @insert_rows, [$hash
                        , $stat->[1] ? $stat->[0]/$stat->[1] : undef
                        , $stat->[3] ? $stat->[2]/$stat->[3] : undef
    ];
}
%STAT = ();

# инсёртим строки в БД
$log->out('Inserting');
@insert_rows = sort {$a->[0] <=> $b->[0]} @insert_rows;
do_mass_insert_sql(PPCDICT, "INSERT into forecast_ctr (hash,ctr,pctr)
                                      VALUES %s
                                      ON DUPLICATE KEY UPDATE
                                        ctr=VALUES(ctr),
                                        pctr=VALUES(pctr)"
                                   , \@insert_rows
                                   , {sleep => 1}
                          );
juggler_ok();

$log->out('FINISH');
