#!/usr/bin/perl

use my_inc '..';

=head1 DEPLOY

# approved by hrustyashko
# .migr
{
    type => 'script',
    when => 'after',
    time_estimate => "60 мин",
    comment => "запускать без параметров, можно добавить --sleep-coef для пауз между апдейтами (по умолчанию спим столько же сколько апдейтим)"
}

=cut

use warnings;
use strict;
use utf8;

use FindBin qw/$Bin/;
use List::Util qw/max/;
use List::MoreUtils qw/uniq/;
use Data::Dumper;

use Yandex::DBTools;
use Yandex::Retry;
use Yandex::ListUtils qw/chunks/;
use Yandex::HashUtils;

use Settings;
use ScriptHelper;
use ShardingTools;

our $DATE_FROM = '2014-07-10 14:12:00';
our $DATE_TO   = '2014-07-11 18:30:00';

# --------------------------------------------------------------------
sub main {
    $log->out('START');

    my $SLEEP_COEF = 1;

    extract_script_params(
        'sleep-coef' => \$SLEEP_COEF,
    );

    $log->out("START");

    my %old_geos;
    open my $FH, '<', "$Bin/20140714_fix_null_geo.data" or die "Error: $!";
    while (my $line = <$FH>) {
        chomp $line;
        my ($pid, $old_geo) = split /\s+/, $line, 2;
        $old_geos{$pid} = $old_geo;
    }
    close $FH;

    my @SHARDS = ppc_shards();

    my $STEP = 1_000_000;
    my $START_PID = 0;
    my $STOP_PID = max @{get_one_column_sql(PPC(shard => 'all'), "SELECT max(pid) FROM phrases")};

    for (my $start = $START_PID; $start < $STOP_PID; $start += $STEP) {
        for my $shard (@SHARDS) {
            $log->out("shard: $shard, start: $start");
            my $sth = exec_sql(PPC(shard => $shard), "SELECT pid, cid
                                                      FROM phrases
                                                      WHERE pid >= ?
                                                        AND pid < ?
                                                        AND IFNULL(geo, '') IN ('', 0)
                                                     ", $start, $start + $STEP);
            my %data;
            while (my ($pid, $cid) = $sth->fetchrow_array) {
                $data{$pid} = {cid => $cid};
            };

            next unless %data;

            my @cids = uniq map {$_->{cid}} values %data;

            my $cids_edited = get_hash_sql(PPCLOG, [
                "select cid, count(*) as cnt
                 from logcmd_20140710
                ", where => {
                    cid => \@cids,
                    cmd => 'saveCamp',
                    logtime__ge => $DATE_FROM
                }, "group by cid"
            ]);
            hash_merge $cids_edited, get_hash_sql(PPCLOG, [
                "select cid, count(*) as cnt
                 from logcmd_20140711
                ", where => {
                    cid => \@cids,
                    cmd => 'saveCamp',
                    logtime__le => $DATE_TO
                }, "group by cid"
            ]);

            for my $pid (keys %data) {
                my $cid = $data{$pid}->{cid};
                next unless $cids_edited->{$cid};
                my $old_geo = $old_geos{$pid};

                unless ($old_geo) {
                    $log->out("pid: $pid cid: $cid old_geo not found");
                    next;
                }

                $log->out("pid: $pid cid: $cid old_geo: $old_geo");

                relaxed times => $SLEEP_COEF, sub {
                    do_update_table(PPC(shard => $shard), 'phrases', {
                        LastChange__dont_quote => 'LastChange',
                        statusBsSynced => 'No',
                        geo => $old_geo,
                    }, where => {pid => $pid, _AND => {_TEXT => "IFNULL(geo, '') IN ('', 0)"}});
                };
            }
        }
    }

    $log->out('FINISH');
}

# --------------------------------------------------------------------
main();
