#!/usr/bin/perl

=head1 DEPLOY

# approved by hrustyashko
# .migr
{
    type => 'script',
    when => 'after',
    time_estimate => 'примерно 1 минута',
    comment => 'запускать так deploy/20170801_fix_banners_href.pl --data-file /var/www/ppc.yandex.ru/deploy/20170801_fix_banners_href.tsv',
}

=cut

=for
    YQL запрос для сбора данных для миграции:

        use hahn;

        select bid
        from hahn.[home/direct/db/banners]
        WHERE href REGEXP '\\n'
        ORDER BY bid ASC;
=cut

use Direct::Modern;

use my_inc '..';

use Moderate::ResyncQueue;
use URLDomain qw/ clear_banner_href /;
use Settings;
use ScriptHelper;

use Yandex::DBShards qw/ sharded_chunks /;
use Yandex::DBTools qw/ get_hash_sql do_mass_update_sql sql_case sql_quote_identifier /;
use Yandex::ListUtils qw/ nsort /;
use Yandex::Retry qw/ relaxed /;
use Yandex::Validate qw/ is_valid_id /;


# из-за использование сложного условия (куда еще входит значение колонки href)
# в case есть опасение что текст запроса будет очень длинным, поэтому размер
# чанка такой небольшой
my $CHUNK_SIZE = 1_000;

my $DATA_PATH;
my $SLEEP_COEF = 0.5;
my $VERBOSE    = 0;
my $DRYRUN     = 0;

my $PRIORITY   = 50;
my $TYPE       = 'banner';
my $REMODERATE = 0;

extract_script_params(
    'data-file=s'  => \$DATA_PATH,
    'sleep-coef=f' => \$SLEEP_COEF,
    'verbose'      => \$VERBOSE,
    'dry'          => \$DRYRUN,
    'priority'     => \$PRIORITY,
    'remoderate'   => \$REMODERATE,
);


unless ( Moderate::ResyncQueue::is_valid_priority( $PRIORITY ) ) {
    die "Invalid priority $PRIORITY given!";
}

unless ( Moderate::ResyncQueue::is_valid_remoderate( $REMODERATE ) ) {
    die "Invalid remoderate $REMODERATE given!";
}


$log->out('START');

$log->out( sprintf( 'File name %s' => $DATA_PATH ) );

my $bids_from_file = get_data_from_file( $DATA_PATH );

my $chunk_count           = 0;
my $total_updated_banners = 0;

my @chunks = sharded_chunks( bid => $bids_from_file, chunk_size => $CHUNK_SIZE, with_undef_shard => 1 );
for my $chunk ( @chunks ) {
    $chunk_count++;

    my $shard = $chunk->{shard} // -1;
    my $bids  = $chunk->{bid};

    $log->out( sprintf( 'Work on chunk %s, shard %s, size %s' => $chunk_count, $shard, scalar @$bids ) );

    my $guard = $log->msg_prefix_guard("[chunk $chunk_count]");

    unless ( $shard ) {
        $log->out( sprintf( "Can't guess shard for bids %s - clients might be under resharding, skip" => join( ', ' => @$bids ) ) );
        next;
    }

    if ( $shard == -1 ) {
        $log->out( sprintf( "Can't guess shard for bids %s - no entries in ppcdict, skip" => join( ', ' => @$bids ) ) );
        next;
    }
    
    my $date_from_db = get_data_from_db_for_bids( $shard, $bids );

    my $data_to_update = get_sanitized_href_for_bids( $bids, $date_from_db );

    my $updated_banners = update_href_for_bids( $shard, $data_to_update );
    
    $log->out( sprintf( '%s banners updated' => $updated_banners ) );

    $total_updated_banners += $updated_banners;

    resend_to_moderation([ nsort keys %$data_to_update ]);
}

$log->out( sprintf( 'Total %s banners updated' => $total_updated_banners ) );

$log->out('FINISH');



sub get_data_from_file {
    my ( $path ) = @_;

    $log->out( 'Load bids and data from file' );

    my $guard = $log->msg_prefix_guard('[get_data_from_file]');

    if ( $VERBOSE ) {
        $log->out({ path => $path });
    }

    my @bids;

    my $previous_bid  = 0;
    my $current_line  = 0;
    my $skipped_lines = 0;

    open my $fh, '<:encoding(UTF-8)', $path or die "Can't open $path: $!\n";

    while ( my $line = <$fh> ) {

        $current_line++;

        chomp $line;

        if ( $line !~ /^(\d+)\z/ ) {
            if ( $current_line != 1 ) { # skip first line with headers
                $log->out( sprintf( 'line #%s - unexpected format [%s], line skipped' => $current_line, $line ) );
            }

            $skipped_lines++;
            next;
        }

        my $bid = $1;

        if ( $VERBOSE ) {
            $log->out({ line => $current_line, bid => $bid });
        }

        unless ( is_valid_id( $bid ) ) {
            $log->out( sprintf( 'line #%s - incorrect banner id %s, line skipped' => $current_line, $bid ) );
            $skipped_lines++;
            next;
        }

        unless ( $bid > $previous_bid ) { # several lines with the same bid can't exist - bids are uniq 
            die sprintf( "Lines in data file MUST BE sorted by bid in asc order - line #%s, bid %s, previous line bid %s\n" => $current_line, $bid, $previous_bid);
        }

        $previous_bid = $bid;

        push @bids, $bid;
    }

    close $fh;

    if ( $VERBOSE ) {
        $log->out({ bids => \@bids });
    }

    if ( $skipped_lines > 1 ) { # do not log when skipped only first line with headers
        $log->out( sprintf( 'Lines skiped: %s' => $skipped_lines ) );
    }

    return \@bids;
}

sub get_data_from_db_for_bids {
    my ( $shard, $bids ) = @_;

    die "No shard given!\n" unless $shard;

    $log->out( 'Load current banners data from db' );

    my $guard = $log->msg_prefix_guard('[get_data_from_db_for_bids]');

    if ( $VERBOSE ) {
        $log->out({ shard => $shard, bids => $bids });
    }

    # NB: если href банера в БД уже изменился и равен null или пустой строке, то пропускаем такие баннеры
    my $result = get_hash_sql( PPC( shard => $shard ), [
        'SELECT bid, href FROM banners',
        WHERE => { bid => $bids, _TEXT => 'length(href) > 0' }
    ] );

    if ( $VERBOSE ) {
        $log->out( $result );
    }

    return $result;
}

sub get_sanitized_href_for_bids {
    my ( $bids, $date_from_db ) = @_;
    
    $log->out( 'Get correct hrefs from banners' );

    my $guard = $log->msg_prefix_guard('[get_sanitized_href_for_bids]');

    if ( $VERBOSE ) {
        $log->out({ bids => $bids }); # data_from_db => $date_from_db
    }

    my %bid_to_sanitized_href;
    for my $bid ( @$bids ) {
    
        unless ( exists $date_from_db->{ $bid } ) {
            $log->out( sprintf( 'No href found in db for bid %s (maybe href set to null or empty string?) - skip' => $bid ) );
            next;
        }

        my $href_from_db = $date_from_db->{ $bid };

        my $sanitized_href = clear_banner_href( $href_from_db );
        if ( $sanitized_href eq $href_from_db ) {
            $log->out( sprintf( 'Sanitized href [%s] equal to href from db [%s] for bid %s - skip' => $sanitized_href, $href_from_db, $bid ) );
            next;
        }

        $log->out( sprintf( 'Change href [%s] to sanitized href [%s] for bid %s' => $href_from_db, $sanitized_href, $bid ) );

        $bid_to_sanitized_href{ $bid } = { new => $sanitized_href, old => $href_from_db };
    }

    if ( $VERBOSE ) {
        $log->out({ bid_to_sanitized_href => \%bid_to_sanitized_href });
    }

    return \%bid_to_sanitized_href;
}

sub update_href_for_bids {
    my ( $shard, $data_to_update ) = @_;

    die "No shard given!\n" unless $shard;

    $log->out( 'Update banners in db' );

    my $guard = $log->msg_prefix_guard('[update_href_for_bids]');

    if ( $VERBOSE ) {
        $log->out({ data_to_update => $data_to_update });
    }

    my @bids = nsort keys %$data_to_update;

    my %bid_to_new_href;
    my %bid_to_old_href;
    for my $bid ( @bids ) {
        my $bids_data = $data_to_update->{ $bid };
        $bid_to_new_href{ $bid } = { href => $bids_data->{new} };
        $bid_to_old_href{ $bid } = $bids_data->{old};
    }

    my $updated = 0;

    unless ( $DRYRUN ) {
        relaxed times => $SLEEP_COEF, sub {
            $updated += do_mass_update_sql( PPC( shard => $shard ), 'banners', 'bid', \%bid_to_new_href,
                where => { href__dont_quote => sql_case( 'bid', \%bid_to_old_href, default__dont_quote => sql_quote_identifier('href') ) });
        };

        if ( scalar @bids != $updated ) {
            $log->out( sprintf( 'Number of updated rows %s not equal to number of rows planned for update %s', $updated, scalar @bids ) );
        }
    }

    return $updated;
}

sub resend_to_moderation {
    my ( $bids ) = @_;

    $log->out( 'Resend banners to moderation' );

    my $guard = $log->msg_prefix_guard('[resend_to_moderation]');

    if ( $VERBOSE ) {
        $log->out({ bids => $bids });
    }

    my @objects;
    for my $bid ( @$bids ) {
        push @objects, {
            id         => $bid,
            type       => $TYPE,
            priority   => $PRIORITY,
            remoderate => $REMODERATE,
        };
    }        

    if ( $VERBOSE ) {
        $log->out({ moderate_resync_queue_data => \@objects });
    }

    unless ( $DRYRUN ) {
        Moderate::ResyncQueue::mod_resync( \@objects, log => $log );
    }

    return;
}