#!/usr/bin/perl

use my_inc '..';

=head1 DEPLOY

# approved by pankovpv
# .migr
{
  type => 'script',
  when => 'after',
  time_estimate => "20-30 минут в продакшене, час-полтора на девтест",
  comment => "при падении можно перезапускать"
}

=cut

use warnings;
use strict;
use utf8;

use Yandex::DBTools;
use Yandex::ListUtils;

use Settings;
use ScriptHelper;

use GeoTools;
use ShardingTools;

my %tables = (
    campaigns => { key => 'cid', },
    phrases   => { key => 'pid', need_update_statusBsSynced => 1, },
    mediaplan_banners => { key => 'mbid', },
);
my $SELECT_CHUNK_SIZE = 5_000_000;
my $VALIDATE_OPTS = {tree => 'api'};

$log->out('START');

for my $shard (ppc_shards) {
    $log->msg_prefix("[shard $shard]");
    while (my ($table, $spec) = each %tables) {
        $log->out("Get data from $table");
        my $key = $spec->{key};
        my ($min, $max) = get_one_line_array_sql(PPC(shard => $shard), "select min($key), max($key) from $table");
        my ($fixed, $skip, $to_fix) = (0, 0, 0);
        while ($min < $max) {
            my %update;

            $log->out("$key: $min " . ($min + $SELECT_CHUNK_SIZE - 1));
            my $rows = get_all_sql(PPC(shard => $shard), ["select $key, geo from $table", where => { 
                geo__is_not_null => 1, 
                geo__ne => '',
                _TEXT => q!geo not regexp '^[0-9,-]+$'!,
                "${key}__between" => [ $min, $min + $SELECT_CHUNK_SIZE - 1 ],
            }]);
            $log->out("selected ".(scalar @$rows)." rows");

            for my $row (@$rows) {
                my $new_geo = $row->{geo} =~ s/[^0-9,-]//gr;
                if (my $err = validate_geo($new_geo, undef, $VALIDATE_OPTS)) {
                    $log->out("ERROR: $err for geo: $row->{geo}");
                    ++$skip;
                } else {
                    ++$to_fix;
                    $update{ $row->{geo} } //= {new_geo => $new_geo};
                    push @{ $update{ $row->{geo} }->{id} }, $row->{$key};
                }
            }
            while (my ($old_geo, $data) = each %update) {
                for my $chunk (chunks($data->{id}, 1_000)) {
                    $log->out({UPDATE => $table, old_geo => $old_geo, new_geo => $data->{new_geo}, $key => $chunk});
                    my $res = do_update_table(PPC(shard => $shard), $table,
                                              {
                                                geo => $data->{new_geo},
                                                ($spec->{need_update_statusBsSynced} ? (statusBsSynced => 'No') : ()),
                                              },
                                              where => {
                                                geo => $old_geo,
                                                $key => $chunk,
                                              });
                    $fixed += $res;
                }
            }
            $min += $SELECT_CHUNK_SIZE;
        }
        $log->out("STAT: $table: fixed $fixed, to_fix $to_fix, skipped $skip");
    }
}
$log->msg_prefix(undef);

$log->out('FINISH');

