#!/usr/bin/perl


=for none

бету переключить на конф. roprod
включить queryrec
./langs.pl > campaigns_stat
./stat.pl campaigns_stat

=cut

use Direct::Modern;
use my_inc '/var/www/beta.liosha.10220', for => 'protected';

use Settings;
use ScriptHelper;

use JSON;
use List::Util qw/min/;
use Yandex::DBTools;
use Yandex::Retry;
use Yandex::HashUtils;
use ShardingTools;

use Lang::Guess;

my $CHUNK_SIZE = 10_000;
my $SLEEP_COEF = 1;

extract_script_params(
    'sleep-coef=f' => \$SLEEP_COEF,
    'min-cid=i' => \my $min_cid,
    'max-cid=i' => \my $max_cid,
);

$log->out('START');

my $real_max_cid = overshard_get_one_line({group => '', max => 'cid'},
    PPC_HEAVY(shard => 'all'),
    'select max(cid) cid from campaigns'
)->{cid};

$min_cid ||= 0;
$max_cid = min grep {$_} ($max_cid, $real_max_cid);

$log->out("going to scan campaigns from $min_cid to $max_cid");

my $from_cid = $min_cid;
while ($from_cid <= $max_cid) {
    my $relax = relaxed_guard(times => $SLEEP_COEF);

    my $to_cid = min($max_cid, $from_cid + $CHUNK_SIZE - 1);
    $log->out("processing cid range $from_cid .. $to_cid");

    my $banners = get_all_sql(PPC(shard=>'all'), [
            'SELECT cid, bid, uid, c.type,
                IF(c.type="dynamic", body, concat(title, " ", body)) as text,
                c.statusActive="Yes" as is_active
            FROM campaigns c
            JOIN banners b USING(cid)',
            WHERE => {
                cid__between => [$from_cid, $to_cid],
                banner_type => [qw/text mobile_content dynamic/],
                'c.statusModerate__ne' => "New",
                'c.type' => [qw/text geo mobile_content dynamic/],
            },
        ]);

    $log->out(sprintf 'got %d banners', scalar @$banners);

    my %camp_stat;
    for my $banner (@$banners) {
        my $lang = analyze_text_lang($banner->{text}) || '??';
        my $stat = $camp_stat{$banner->{cid}} ||= hash_cut $banner, qw/cid type uid is_active/;
        $stat->{lang}->{$lang}++;
    }

    $log->out(sprintf 'writing stat for %d campaigns', scalar keys %camp_stat);

    for my $cid (sort {$a <=> $b} keys %camp_stat) {
        say to_json($camp_stat{$cid}, {canonical => 1});
    }

    $from_cid = $to_cid + 1;
}

$log->out('FINISH');

