#!/usr/bin/perl

=head1 DEPLOY

# .migr
{
  tasks => [
    {
      type => 'script',
      when => 'after',
      time_estimate => 'примерно 6 дней в один поток, 1 час на 10_000 записей',
      comment => 'запускать по просьбе santama@',
    }
  ],
  approved_by => 'hrustyashko'
}

=cut

# Мигрировать первую картинку
# deploy/20180420_banner_images_formats_super_resolution.pl --limit 1 --offset 0

# Запуск параллельно пачками по 50000 картинок
# где 1292000 - общее количество записей в таблице YT
# LIMIT=50000; for offset in `seq 0 ${LIMIT} 1292000`; do deploy/20180420_banner_images_formats_super_resolution.pl --limit $LIMIT --offset $offset &>/dev/null &disown; done

# Запуск параллельно по одному потоку на шард для равномерной нагрузки на шарды
# т.к. данные по шардам распределеные неравномерно: https://yql.yandex-team.ru/Operations/WuglBTf2fwcT3qHxZTRbYG90cIU-fK14DiJtmLbj2Rw=
# for shard in `seq 1 1 15`; do deploy/20180420_banner_images_formats_super_resolution.pl --shard $shard &>/dev/null &disown; done

# Запрос, наливающий данные в //home/direct/test/santama/banner_images_formats_super_resolution_sharded
# https://yql.yandex-team.ru/Operations/WuSv33LzZXu8bULuatB7B--QGh5dtL-eXmBFQioLas8=

use my_inc '..';
use Direct::Modern;

use BS::ResyncQueue qw/bs_resync/;
use Moderate::ResyncQueue qw/mod_resync/;
use Tools;
use Getopt::Long;
use Settings;
use Yandex::DBShards;
use Yandex::DBTools;
use Yandex::HashUtils qw/hash_cut/;
use Yandex::ListUtils qw/chunks/;
use Yandex::Retry qw/relaxed_guard/;
use Yandex::YT::Table;

my $OFFSET     = 0;
my $LIMIT      = 0;
my $DRYRUN     = 0;
my $REMODERATE = 0;
my $REVERT     = 0;
my $YT_CLUSTER = 'banach';
my $CHUNK_SIZE = 1000;
my $SHARD = undef;
my $PRIORITY   = BS::ResyncQueue::PRIORITY_ONE_SHOT_FIX_MISSING_IMAGE_FORMATS;
my $TABLE      = '//home/direct/test/santama/banner_images_formats_super_resolution_sharded';

extract_script_params(
    'offset=i'      => \$OFFSET,
    'limit=i'       => \$LIMIT,
    'dry'           => \$DRYRUN,
    'remoderate'    => \$REMODERATE,
    'revert'        => \$REVERT,
    'cluster=s'     => \$YT_CLUSTER,
    'table=s'       => \$TABLE,
    'chunksize=i'   => \$CHUNK_SIZE,
    'priority=i'    => \$PRIORITY,
    'shard=s'       => \$SHARD,
);

my $logger = Yandex::Log->new(
    log_file_name => '20180420_banner_images_formats_super_resolution' . (defined $SHARD ? ".shard_${SHARD}" : "") . (defined $OFFSET ? ".offset_${OFFSET}" : "") . ".log",
    date_suf      => '%Y%m%d',
);

$logger->out('START');

my $TOTAL_INSERTED_BANNER_IMAGES_FORMATS = 0;
my $TOTAL_BANNER_IMAGES_UPDATED          = 0;
my $TOTAL_SYNCED_BANNERS                 = 0;

if ($LIMIT) {
    $logger->out( sprintf( 'limit on images amount is given - %s' => $LIMIT ) );
}

if ($OFFSET) {
    $logger->out( sprintf( 'offset is given - %s' => $OFFSET ) );
}

if ($SHARD) {
    $logger->out( sprintf( 'shard is given - %s' => $SHARD ) );
}

Tools::force_set_yt_environment($YT_CLUSTER);

my $table = Yandex::YT::Table->new($TABLE);
unless ($table->exists()) {
    $logger->die("table $TABLE doesn't exists");
}

my @image_list_buffer = ();
my $END_ROW_NUM = $LIMIT > 0 ? '#' . ($OFFSET + $LIMIT) : '';
my $reader = $table->reader(undef, "[#$OFFSET:${END_ROW_NUM}]", format => "json");
while ( my $r = $reader->next() ) {

    next if defined $SHARD && $SHARD ne $r->{shard};

    push @image_list_buffer, $r;

    if ( @image_list_buffer >= $CHUNK_SIZE ) {
        proccess_images( \@image_list_buffer );
        @image_list_buffer = ();
    }

}

if ( @image_list_buffer ) {
    proccess_images( \@image_list_buffer );
}

$logger->out('FINISH');

sub proccess_images {
    my $images = shift;

    my %sharded_images;
    for (@$images) {
        push @{ $sharded_images{$_->{shard}} }, $_;
    }
    foreach_shard shard => [keys %sharded_images], sub {
        my ($shard) = @_;

        my @shard_images = @{ $sharded_images{$shard} };

        $logger->out( sprintf('shard #%s, %s images to insert', $shard, scalar(@shard_images) ) );

        my $rg = relaxed_guard times => 1;

        insert_super_resolution_images($shard, \@shard_images);

        update_banner_images( $shard, \@shard_images );

        resend_by_image_hash($shard, \@shard_images);
    };
}

$logger->out( sprintf( 'total %s banner_images_formats rows inserted' => $TOTAL_INSERTED_BANNER_IMAGES_FORMATS ) );
$logger->out( sprintf( 'total %s banner_images rows updated' => $TOTAL_BANNER_IMAGES_UPDATED ) );
$logger->out( sprintf( 'total %s banners synced' => $TOTAL_SYNCED_BANNERS ) );

$logger->out('FINISH');


sub insert_super_resolution_images {
    my ( $shard, $images ) = @_;

    my $size = scalar @$images;

    return unless $size;

    my $res;
    if (!$DRYRUN) {
        my @to_insert;
        foreach my $image (@$images) {
            push @to_insert, [ $image->{image_hash}, $image->{mds_group_id}, $image->{namespace},
                    $image->{image_type}, $image->{width}, $image->{height}, $image->{formats},
                    $image->{avatars_host} ];
        }

        $res = do_mass_insert_sql(PPC(shard => $shard),
            'insert into banner_images_formats (image_hash, mds_group_id, namespace, image_type, width, height, formats, avatars_host) values %s
            on duplicate key update image_hash = image_hash', \@to_insert);
    } else {
        $res = $size;
    }

    $TOTAL_INSERTED_BANNER_IMAGES_FORMATS += $res;

    return $res;
}

sub update_banner_images {
    my ( $shard, $images ) = @_;

    my $size = scalar @$images;

    return unless $size;

    my $res;
    if ( !$DRYRUN ) {
        $logger->out({'update_banner_images' => [ map { hash_cut($_, qw/image_hash parent_image_hash/) } @$images ]});
        my %parent_image_hash_to_image_hash;
        if (!$REVERT) {
            %parent_image_hash_to_image_hash = map {$_->{parent_image_hash} => { image_hash => $_->{image_hash} }} @$images;
        } else {
            %parent_image_hash_to_image_hash = map {$_->{image_hash} => { image_hash => $_->{parent_image_hash} }} @$images;
        }

        $res = do_mass_update_sql(PPC(shard => $shard), 'banner_images', 'image_hash', \%parent_image_hash_to_image_hash);
        do_mass_update_sql(PPC(shard => $shard), 'ignore banner_images_pool', 'image_hash', \%parent_image_hash_to_image_hash);
    } else {
        $res = get_one_line_sql(PPC(shard => $shard), ['SELECT count(distinct image_hash) cnt FROM banner_images', WHERE => {
                    image_hash => [ map { $_->{parent_image_hash} } @$images ],
                }])->{cnt};
        $logger->out( sprintf('shard #%s, is to update %s images', $shard, $res ));
    }
    $TOTAL_BANNER_IMAGES_UPDATED += $res;

}

sub resend_by_image_hash
{
    my ($shard, $shard_images) = @_;

    my @super_resolution_hashes = map { !$REVERT ? $_->{image_hash} : $_->{parent_image_hash} } @$shard_images;
    my @original_hashes = map { !$REVERT ? $_->{parent_image_hash} : $_->{image_hash} } @$shard_images;

    my $banners = get_all_sql( PPC(shard => $shard),
        [
            "SELECT b.cid, b.bid, 0 as pid, $PRIORITY as priority
             FROM banner_images bim JOIN banners b USING(bid) JOIN campaigns c USING(cid)",
            where => { 'bim.image_hash' => [ !$DRYRUN ? @super_resolution_hashes : @original_hashes ], 'b.statusArch__ne' => 'Yes',  'c.archived__ne' => 'Yes'},
        ],
    );

    my $size = scalar @$banners;
    if (!$DRYRUN) {
        for my $banners_chunk (chunks($banners, 1_000)) {
            $logger->out({resync_queue => $banners_chunk});

            my @bids = map {$_->{bid}} @$banners_chunk;
            do_update_table(PPC(shard => $shard), 'banners',
                { 'LastChange__dont_quote' => 'NOW()',
                },
                where => { 'bid' => \@bids });
            my @auto_moderate_bids = map {[ $_->{bid} ]} @$banners_chunk;
            do_mass_insert_sql(PPC(shard => $shard), 'INSERT IGNORE INTO auto_moderate(bid) VALUES %s',
                \@auto_moderate_bids);

            BS::ResyncQueue::bs_resync($banners_chunk);

            my @objects;
            for my $bid (@bids) {
                push @objects, {
                        id         => $bid,
                        type       => 'banner',
                        priority   => 50,
                        remoderate => $REMODERATE,
                    };
            }
            Moderate::ResyncQueue::mod_resync(\@objects, log => $logger);
        }

        $logger->out('banners added to resync_queue: ' . $size);
    } else {
        $logger->out( sprintf('shard #%s, is to resync %s banners', $shard, $size ));
    }
    $TOTAL_SYNCED_BANNERS += $size;
}

sub extract_script_params {
    my %options = @_;

    Getopt::Long::GetOptions(%options)
        or die "Getopt error: $!; stop";

    return 1;
}

