#!/usr/bin/perl

use strict;
use warnings;

use utf8;
use open ':std' => ':utf8';

use FindBin qw/$Bin/;
use lib "$Bin/../protected";

use Settings;
use Yandex::DBTools;
use Yandex::HashUtils;
use ScriptHelper;
use List::MoreUtils qw/uniq part/;
$|++;


=head1 DEPLOY

# approved by zhur
# .migr
[
{
    type => 'script',
    when => 'after',
    time_estimate => '1 час',
    comment => 'обновляем geo у кампаний и фраз',
},
{
    type => 'manual',
    when => 'after',
    text => 'запустить на ppcback01f
sudo -u ppc /var/www/ppc.yandex.ru/protected/mk_regions.pl --db
',
    time_estimate => '2 минуты',
},
]
=cut


# что на что меняем
my %changed = (
    1004 => [ 1004, 983 ], # Ближний Восток -> Ближний Восток, Турция
    111  => [ 111, -983 ], # Европа -> Европа, -Турция 
    183  => [ 183, 983  ], # Азия -> Азия, Турция
);

my $changed_sql = join " OR ", 
    map { " geo = '$_' or geo like '$_,%' or geo like '%,$_' or geo like '%,$_,%' or geo like '%,-$_,%' or geo like '%,-$_'\n" } keys %changed;

$log->out('start');
my $sth = exec_sql(PPC,"select cid, geo from campaigns where $changed_sql");
$log->out('updating campaigns');
my %update;
my $cnt = 0;
my $skip = 0;
while (my ($cid, $geo) = $sth->fetchrow_array()) {
    my (@geo) = split /,/, $geo;
    my $new_geo = new_geo($geo, \%changed);
    if ($new_geo ne $geo) {
        $update{$cid}{geo} = $geo;
        $update{$cid}{new_geo} = $new_geo;
        $log->out("$cid: '$geo' => '$new_geo'");
        $cnt++;
    }
    else {
        $log->out("$cid: '$geo' skip");
        $skip++;
    }
    if (scalar keys %update >= 1000) {
        do_mass_insert_sql(PPC,"INSERT IGNORE INTO bs_resync_queue (cid) VALUES %s", [ map { [ $_ ] } keys %update ]);
        do_sql(PPC, ["update campaigns set geo = ",
            sql_case(PPC, cid => hash_map { $_->{new_geo} } \%update),
            WHERE => {
                cid => [ keys %update ],
                geo__dont_quote => sql_case(PPC, cid => hash_map { $_->{geo} } \%update),
            },
        ]);
        %update = ();
    }
}
if (keys %update) {
    do_mass_insert_sql(PPC,"INSERT IGNORE INTO bs_resync_queue (cid) VALUES %s", [ map { [ $_ ] } keys %update ]);
    do_sql(PPC, ["update campaigns set geo = ",
        sql_case(PPC, cid => hash_map { $_->{new_geo} } \%update),
        WHERE => {
            cid => [ keys %update ],
            geo__dont_quote => sql_case(PPC, cid => hash_map { $_->{geo} } \%update),
        },
    ]);
    %update = ();
}
$log->out("table campaigns: $cnt updated, $skip skipped");

$cnt = $skip = 0;
my $chunk = 1_000_000;
$log->out("updating phrases");
my ($min_pid, $max_pid) = get_one_line_array_sql(PPC,"select min(pid), max(pid) from phrases"); 
while ($min_pid < $max_pid) {
    $log->out("select min_pid = $min_pid");
    $sth = exec_sql(PPC,"select pid, cid, geo from phrases join banners using(bid) where ($changed_sql) and pid between $min_pid and $min_pid + $chunk");
    while (my ($pid, $cid, $geo) = $sth->fetchrow_array()) {
        my $new_geo = new_geo($geo, \%changed);
        if ($new_geo ne $geo) {
            $update{$pid}{geo} = $geo;
            $update{$pid}{new_geo} = $new_geo;
            $update{$pid}{cid} = $cid;
            $log->out("$pid: '$geo' => '$new_geo'");
            $cnt++;
        }
        else {
            $log->out("$cid $pid: '$geo' skip");
            $skip++;
        }
        if (scalar keys %update >= 1000) {
            do_mass_insert_sql(PPC,"INSERT IGNORE INTO bs_resync_queue (cid, pid) VALUES %s", [ map { [ $update{$_}{cid}, $_ ] } keys %update ]);
            do_sql(PPC, [ "update phrases set geo = ", 
                sql_case(PPC, pid => hash_map { $_->{new_geo} } \%update),
                WHERE => {
                    pid => [ keys %update ],
                    geo__dont_quote => sql_case(PPC, pid => hash_map { $_->{geo} } \%update),
                },
            ]);
            %update = ();
        }
    }
    $sth->finish;
    $min_pid += $chunk;
}
if (scalar keys %update) {
    do_mass_insert_sql(PPC,"INSERT IGNORE INTO bs_resync_queue (cid, pid) VALUES %s", [ map { [ $update{$_}{cid}, $_ ] } keys %update ]);
    do_sql(PPC, [ "update phrases set geo = ", 
        sql_case(PPC, pid => hash_map { $_->{new_geo} } \%update),
        WHERE => {
            pid => [ keys %update ],
            geo__dont_quote => sql_case(PPC, pid => hash_map { $_->{geo} } \%update),
        },
    ]);
}
$log->out("phrases: $cnt updated, $skip skipped");
$log->out('end');

sub new_geo
{
    my ($geo, $changed) = @_;
    my %changed = %$changed;
    my %used;
    my (@regions) = map { s/,$//; $_ } ($geo =~ /(\d+,?(?:-\d+,?)*)/g); # разбиваем на группы: "1,-2,-3,4,-5,6" => [ "1,-2,-3", "4,-5", "6" ]
    my @new;
    for my $reg (@regions) {
        my @new_reg = ();
        my ($plus_geo, $minus_geo) = part { /^-/?1:0 } split /,/, ($reg||0);
        $minus_geo ||= [];
        my %plus_geo = map { $_ => 1 } @$plus_geo;
        my %minus_geo = map { $_ => 1 } @$minus_geo;
        for my $geo (@$plus_geo) {
            push @new_reg, @{$changed{$geo} || [$geo] };
        }
        push @new_reg, @$minus_geo;
        map { $used{abs($_)} += $_ > 0 ? 1 : -1 } @new_reg;
        push @new, @new_reg;
        @new = grep { $used{abs($_)} != 0 && ( ( $_ < 0 && $used{abs($_)} < 0 ) || ($_ > 0 && $used{abs($_)} > 0 ) )  } @new;
    }
    return join ',', uniq @new;
}

=begin testing

my %changed = (
    1004 => [ 1004, 983 ], # Ближний Восток -> Ближний Восток, Турция
    111  => [ 111, -983 ], # Европа -> Европа, -Турция 
    183  => [ 183, 983  ], # Азия -> Азия, Турция
);

is(new_geo('225', \%changed),'225');
is(new_geo('225,983', \%changed),'225,983');
is(new_geo('111,183', \%changed),'111,183');
is(new_geo('183,111', \%changed),'183,111');
is(new_geo('111', \%changed), '111,-983');
is(new_geo('983', \%changed), '983');
is(new_geo('183', \%changed), '183,983');
is(new_geo('1004,111', \%changed), '1004,111');
is(new_geo('111,983,183', \%changed), '111,183,983');

# real life examples
is(new_geo('1004,-1056,-181,-983', \%changed), '1004,-1056,-181');
is(new_geo('10174,-969,-10867,111,183,-169,-1056,-210,-983,-994,-995,10002', \%changed), '10174,-969,-10867,111,-983,183,-169,-1056,-210,-994,-995,10002');
is(new_geo('183,-983,111', \%changed),'183,111,-983');
is(new_geo('225,169,166,111,983', \%changed),'225,169,166,111');
is(new_geo('225,168,149,111,1004,-983,994,134,995,135,137,10002,10003', \%changed),'225,168,149,111,-983,1004,994,134,995,135,137,10002,10003');

=end testing


