#!/usr/bin/perl

use my_inc '..';

=head1 DESCRIPTION

Исправляем geo после слияния Балашихи и Жел-Дора, Королёва и Юбилейного.

=head1 DEPLOY

# approved by hrustyashko
# .migr
[
  {
    type => 'script',
    when => 'after',
    time_estimate => "8 минут",
    comment => "",
    script => "/var/www/ppc.yandex.ru/protected/mk_regions.pl --db"
  },
  {
    type => 'script',
    when => 'after',
    time_estimate => "6..12 часов",
    comment => "Можно останавливать и перезапускать. 
Когда запустится на продакшене -- внимательно смотреть на LA на ppcdata* (https://ppcgraphite.yandex.ru/grafana/dashboard/db/direct-ppcdata),
если видно заметное повышение -- скрипт прибивать и запускать с --sleep-coef 2, при необходимости увеличивать коэффициент."
  }
]

=cut

use warnings;
use strict;
use utf8;

use List::MoreUtils qw/part/;

use Settings;
use ScriptHelper;
use Yandex::DBTools;
use Yandex::DBShards;
use ShardingTools;
use Yandex::Retry;
use Test::More;
use BS::ResyncQueue;

my $SLEEP_COEF = 1;
my $SHARD;
my $TEST;
my $CHUNK_SIZE = 1_000_000;

extract_script_params(
    'sleep-coef' => \$SLEEP_COEF,
    'shard-id=s' => \$SHARD,
    'test' => \$TEST
);

# from
my $yubileyniy = 21620;
# to
my $korolev = 20728;

# from
my $zhel_dor = 21622;
# to
my $balashiha = 10716;

if ($TEST) {
    my $msk_region = 1;
    my $moscow = 213;
    my $spb = 2;

    is cleanup_geo($yubileyniy), $korolev;
    is cleanup_geo("$yubileyniy,$korolev"), "$yubileyniy,$korolev";
    is cleanup_geo("$yubileyniy,$zhel_dor"), "$korolev,$balashiha";
    is cleanup_geo("$zhel_dor,$yubileyniy"), "$korolev,$balashiha";
    is cleanup_geo("$yubileyniy,$korolev,$moscow"), "$yubileyniy,$korolev,$moscow";

    is cleanup_geo($zhel_dor), $balashiha;
    is cleanup_geo("$zhel_dor,$balashiha"), "$zhel_dor,$balashiha";
    is cleanup_geo("$zhel_dor,$balashiha,$spb"), "$zhel_dor,$balashiha,$spb";

    is cleanup_geo("-$yubileyniy,-$korolev"), "-$yubileyniy,-$korolev";
    is cleanup_geo("$msk_region,-$yubileyniy,-$korolev"), "$msk_region,-$yubileyniy,-$korolev";
    is cleanup_geo("-$yubileyniy,-$korolev,$msk_region"), "-$yubileyniy,-$korolev,$msk_region"; # Order is preserved
    is cleanup_geo("$korolev,-$yubileyniy"), "$korolev";
    is cleanup_geo("$msk_region,$korolev,-$yubileyniy"), "$msk_region,$korolev";

    is cleanup_geo("-$zhel_dor,-$balashiha"), "-$zhel_dor,-$balashiha";
    is cleanup_geo("$msk_region,-$zhel_dor,-$balashiha"), "$msk_region,-$zhel_dor,-$balashiha";
    is cleanup_geo("$balashiha,-$zhel_dor"), "$balashiha";
    is cleanup_geo("$msk_region,$balashiha,-$zhel_dor"), "$msk_region,$balashiha";

    is cleanup_geo("$balashiha,-$zhel_dor,$korolev,-$yubileyniy"), "$balashiha,$korolev";
    is cleanup_geo("$msk_region,$balashiha,-$zhel_dor,$korolev,-$yubileyniy"), "$msk_region,$balashiha,$korolev";

    is cleanup_geo("$balashiha,$korolev,-$zhel_dor,-$yubileyniy"), "$balashiha,$korolev";
    is cleanup_geo("$msk_region,$balashiha,$korolev,-$zhel_dor,-$yubileyniy"), "$msk_region,$balashiha,$korolev";

    done_testing;
    exit 0;
}

my %shards = ($SHARD) ? ($SHARD => 1) : map { $_ => 1 } ppc_shards();

$log->out('START');

foreach_shard_parallel_verbose(
    $log,
    sub {
        my ($shard) = @_;
        return unless $shards{$shard};
        my $log_guard = $log->msg_prefix_guard("[shard $shard]");

        iterate_adgroups($shard);
        iterate_campaigns($shard);
        iterate_mediaplans($shard);
        iterate_media_groups($shard);

        $log->out("Working");
    }
);

$log->out('FINISH');

sub cleanup_geo {
    my ($geo) = @_;

    my @result;
    my ($plus_geos, $minus_geos) = part { /^-/ ? 1 : 0 } split /,/, $geo;
    my (%plus_map, %minus_map);
    $plus_map{$_} = 1 for @$plus_geos;
    $minus_map{-$_} = 1 for @$minus_geos;

    if (($minus_map{$yubileyniy} && $minus_map{$korolev}) || ($minus_map{$balashiha} && $minus_map{$zhel_dor})) {
        # no-op
    }
    if ($plus_map{$korolev} && $minus_map{$yubileyniy}) {
        delete $minus_map{$yubileyniy};
    }
    if ($plus_map{$balashiha} && $minus_map{$zhel_dor}) {
        delete $minus_map{$zhel_dor};
    }
    if (!$plus_map{$korolev} && $plus_map{$yubileyniy}) {
        delete $plus_map{$yubileyniy};
        push @result, $korolev;
    }
    if (!$plus_map{$balashiha} && $plus_map{$zhel_dor}) {
        delete $plus_map{$zhel_dor};
        push @result, $balashiha;
    }

    for my $geo_id (split /,/, $geo) {
        if ($geo_id =~ /^-/ ? $minus_map{-$geo_id} : $plus_map{$geo_id}) {
            push @result, $geo_id;
        }
    }

    return join(",", @result);
}

sub iterate_adgroups {
    my $shard = shift;
    my $min = get_one_field_sql(PPC(shard => $shard), "select min(pid) from phrases") // 0;
    my $max = get_one_field_sql(PPC(shard => $shard), "select max(pid) from phrases") // 0;
    my $count = 0;
    while ($min < $max) {
        my $relax = relaxed_guard(times => $SLEEP_COEF);
        my $batch = get_all_sql(PPC(shard => $shard), [
            "select pid, cid, geo from phrases",
            where => {
                pid__between => [$min, $min + $CHUNK_SIZE],
                geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)"
            },
        ]);
        my @changed;
        for my $adgroup (@$batch) {
            my $new_geo = cleanup_geo($adgroup->{geo});
            next if $new_geo eq $adgroup->{geo};
            $adgroup->{geo} = $new_geo;
            $adgroup->{priority} = -30;
            push @changed, $adgroup;
        }
        if (@changed) {
            do_update_table(PPC(shard => $shard), 'phrases', {
                geo__dont_quote => sql_case('pid', {map { $_->{pid} => $_->{geo} } @changed})
            }, where => {
                pid => [ map { $_->{pid} } @changed ],
                geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)"
            });
            bs_resync(\@changed);
            $count += @changed;
        }
        $min += $CHUNK_SIZE;
        $log->out(sprintf "looked for pid %d/%d", $min, $max);
    }
    $log->out("Changed $count adgroups");
}

sub iterate_campaigns {
    my $shard = shift;
    my $min = get_one_field_sql(PPC(shard => $shard), "select min(cid) from campaigns") // 0;
    my $max = get_one_field_sql(PPC(shard => $shard), "select max(cid) from campaigns") // 0;
    my $count = 0;
    while ($min < $max) {
        my $relax = relaxed_guard(times => $SLEEP_COEF);
        my $batch = get_all_sql(PPC(shard => $shard), [
            "select cid, geo from campaigns",
            where => {
                cid__between => [$min, $min + $CHUNK_SIZE],
                geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)"
            },
        ]);
        my @changed;
        for my $camp (@$batch) {
            my $new_geo = cleanup_geo($camp->{geo});
            next if $new_geo eq $camp->{geo};
            $camp->{geo} = $new_geo;
            push @changed, $camp;
        }
        if (@changed) {
            do_update_table(PPC(shard => $shard), 'campaigns', {
                geo__dont_quote => sql_case('cid', {map { $_->{cid} => $_->{geo} } @changed})
            }, where => {
                cid => [ map { $_->{cid} } @changed ],
                geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)"
            });
            $count += @changed;
        }
        $min += $CHUNK_SIZE;
        $log->out(sprintf "looked for cid %d/%d", $min, $max);
    }
    $log->out("Changed $count campaigns");
}

sub iterate_mediaplans {
    my $shard = shift;
    my $min = get_one_field_sql(PPC(shard => $shard), "select min(mbid) from mediaplan_banners") // 0;
    my $max = get_one_field_sql(PPC(shard => $shard), "select max(mbid) from mediaplan_banners") // 0;
    my $count = 0;
    while ($min < $max) {
        my $relax = relaxed_guard(times => $SLEEP_COEF);
        my $batch = get_all_sql(PPC(shard => $shard), [
            "select mbid, geo from mediaplan_banners",
            where => {
                mbid__between => [$min, $min + $CHUNK_SIZE],
                geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)"
            },
        ]);
        my %changes;
        for my $banner (@$batch) {
            my $new_geo = cleanup_geo($banner->{geo});
            next if $new_geo eq $banner->{geo};
            $changes{$banner->{mbid}} = $new_geo;
            $count++;
        }
        if (%changes) {
            do_update_table(PPC(shard => $shard), 'mediaplan_banners', {
                geo__dont_quote => sql_case(mbid => \%changes)
            }, where => { mbid => [keys %changes], geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)" });
        }
        $min += $CHUNK_SIZE;
        $log->out(sprintf "looked for mbid %d/%d", $min, $max);
    }
    $log->out("Changed $count mediaplan banners");
}

sub iterate_media_groups {
    my $shard = shift;
    my $min = get_one_field_sql(PPC(shard => $shard), "select min(mgid) from media_groups") // 0;
    my $max = get_one_field_sql(PPC(shard => $shard), "select max(mgid) from media_groups") // 0;
    my $count = 0;
    while ($min < $max) {
        my $relax = relaxed_guard(times => $SLEEP_COEF);
        my $batch = get_all_sql(PPC(shard => $shard), [
            "select mgid, geo from media_groups",
            where => {
                mgid__between => [$min, $min + $CHUNK_SIZE],
                geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)"
            },
        ]);
        my %changes;
        for my $group (@$batch) {
            my $new_geo = cleanup_geo($group->{geo});
            next if $new_geo eq $group->{geo};
            $changes{$group->{mgid}} = $new_geo;
            $count++;
        }
        if (%changes) {
            do_update_table(PPC(shard => $shard), 'media_groups', {
                geo__dont_quote => sql_case(mgid => \%changes)
            }, where => { mgid => [keys %changes], geo__rlike => "(^|,)-?($yubileyniy|$zhel_dor)(,|\$)" });
        }
        $min += $CHUNK_SIZE;
        $log->out(sprintf "looked for mgid %d/%d", $min, $max);
    }
    $log->out("Changed $count media groups");
}
