#!/usr/bin/perl

=encoding UTF-8

=head1 DESCRIPTION

Скрипт для доналивки оставшихся данных и переименования таблиц статистики в ClickHouse

=head1 USAGE

perl ./migrations/before_release/PI-13340_alter_clickhouse_for_statistics.pl --stage=test --pasword=xxxxxxxx

=head1 OPTIONS

  stage    - DB stage (test / prod)
  password - DB password

=cut

use qbit;

use Pod::Usage;
use Getopt::Long qw();

sub _get_args {
    my $password;
    my $help  = 0;
    my $stage = 'test';

    Getopt::Long::GetOptions(
        'stage=s'    => \$stage,
        'password=s' => \$password,
        'help|?|h'   => \$help,
    ) or pod2usage(1);

    pod2usage(1) if !defined $password;

    pod2usage(-verbose => 2, -noperldoc => 1) if $help;

    return ($stage, $password);
}

my $TEMP_DIR = '/tmp/clickhouse_tmp_data';
my ($stage, $password) = _get_args;

print "#START#\n";
mkdir $TEMP_DIR;

my $CH_CONN_PRESTABLE_1 =
"clickhouse-client --ssl --host=man-1t71zs5e3qalyfvq.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_CONN_PRESTABLE_2 =
"clickhouse-client --ssl --host=sas-h69utjbj38jrgroj.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_PRESTABLE_CLUSTER = 'partner_prestable_cluster';

my $CH_CONN_PRODUCTION_1 =
"clickhouse-client --ssl --host=sas-mw8x8a7kp009dvql.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_CONN_PRODUCTION_2 =
"clickhouse-client --ssl --host=man-whecdgrou6tfa07o.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_PRODUCTION_CLUSTER = 'partner_production_cluster';

my $OLD_TABLE_NAME    = 'statistics';
my $NEW_TABLE_NAME    = 'statistics_tmp';
my $BACKUP_TABLE_NAME = 'statistics_old';

my $ch_conn_1;
my $ch_conn_2;
my $cluster;

if ($stage eq 'test') {
    $ch_conn_1 = $CH_CONN_PRESTABLE_1;
    $ch_conn_2 = $CH_CONN_PRESTABLE_2;
    $cluster   = $CH_PRESTABLE_CLUSTER;
} elsif ($stage eq 'prod') {
    $ch_conn_1 = $CH_CONN_PRODUCTION_1;
    $ch_conn_2 = $CH_CONN_PRODUCTION_2;
    $cluster   = $CH_PRODUCTION_CLUSTER;
} else {
    die "You need correctly set STAGE value before running script...";
}

#Переносим последние дни, после чего переименовываем таблицы
my $table_from = $OLD_TABLE_NAME;
my $table_to   = $NEW_TABLE_NAME;

my $start_date = `echo "SELECT MAX(dt) from $table_to FORMAT TabSeparated" | $ch_conn_1`;
chomp($start_date);

my $rows = `echo "SELECT count(*) from $table_from where dt>'$start_date' FORMAT TabSeparated" | $ch_conn_1`;
chomp($rows);
printf("%s Start date: %s\n", curdate(oformat => 'db_time'), $start_date);

my $sql = qq{
    SELECT
        `dt`,
        `product_id`,
        `page_id`,
        `block_id`,
        `dsp_id`,
        `currency_id`,
        `tag_id`,
        `category_id`,
        `shows`,
        `clicks`,
        `hits`,
        `direct_clicks`,
        `direct_shows`,
        `all_w_nds`,
        `all_wo_nds`,
        `partner_w_nds`,
        `partner_wo_nds`,
        `bad_shows`,
        `bad_hits`,
        `bad_win_price_w_nds`,
        `bad_win_price_wo_nds`,
        `bad_win_partner_price_w_nds`,
        `bad_win_partner_price_wo_nds`,
        `win_max_positions_count`,
        `an_fraud_shows`,
        `an_fraud_clicks`,
        `an_fraud_hits`,
        `an_cover_hits`,
        `an_cover_direct_hits`,
        `an_cover_market_hits`,
        `an_cover_mcb_hits`,
        `an_cover_senthits`,
        `an_rtb_cover_hits`,
        `an_rtb_cover_senthits`,
        `an_rtb_cover_direct_hits`,
        `direct_page_ad_shows`,
        `premium_page_ad_shows`,
        `direct_hits`,
        `market_hits`,
        `mcb_hits`,
        `premium_hits`,
        `view`,
        `open_player`
    FROM `partner`.`$table_from`
    WHERE
        `dt` > '$start_date'
    FORMAT TabSeparated
};

writefile("$TEMP_DIR/SQL", $sql);

system "cat $TEMP_DIR/SQL | $ch_conn_1 > $TEMP_DIR/data.tsv";

unlink "$TEMP_DIR/SQL";

printf("%s Rows exported: %s\n", curdate(oformat => 'db_time'), $rows);

print
`$ch_conn_1 --query="INSERT INTO $table_to (dt,product_id,page_id,block_id,dsp_id,currency_id,tag_id,category_id,shows,clicks,hits,direct_clicks,direct_shows,all_w_nds,all_wo_nds,partner_w_nds,partner_wo_nds,bad_shows,bad_hits,bad_win_price_w_nds,bad_win_price_wo_nds,bad_win_partner_price_w_nds,bad_win_partner_price_wo_nds,win_max_positions_count,an_fraud_shows,an_fraud_clicks,an_fraud_hits,an_cover_hits,an_cover_direct_hits,an_cover_market_hits,an_cover_mcb_hits,an_cover_senthits,an_rtb_cover_hits,an_rtb_cover_senthits,an_rtb_cover_direct_hits,direct_page_ad_shows,premium_page_ad_shows,direct_hits,market_hits,mcb_hits,premium_hits,view,open_player) FORMAT TabSeparated " < $TEMP_DIR/data.tsv`;

print "Rows inserted.\nRemove temporary file $TEMP_DIR/data.tsv\n\n";

unlink "$TEMP_DIR/data.tsv";

#RENAME TABLE ... ON CLUSTER ... долго отрабатывает и клиент отваливается по таймауту. Проще переименовать в каждой реплике отдельно
print `echo 'RENAME TABLE partner.$table_from TO partner.$BACKUP_TABLE_NAME' | $ch_conn_1`;
print `echo 'RENAME TABLE partner.$table_to TO partner.$table_from' | $ch_conn_1`;
print `echo 'RENAME TABLE partner.$table_from TO partner.$BACKUP_TABLE_NAME' | $ch_conn_2`;
print `echo 'RENAME TABLE partner.$table_to TO partner.$table_from' | $ch_conn_2`;
$table_to   = $table_from;
$table_from = $BACKUP_TABLE_NAME;

print `echo "optimize table $table_to" | $ch_conn_1`;
print `echo "optimize table $table_to" | $ch_conn_2`;

#Проверяем данные.
print "Checking values in tables...\n";
printf "%s Old table...\n", curdate(oformat => 'db_time');

my @field_list =
  qw(count shows clicks hits diract_clicks direct_shows all_w_nds all_wo_nds partner_w_nds partner_wo_nds bad_shows bad_hits bad_win_price_w_nds bad_win_price_wo_nds pad_win_partner_price_w_nds bad_win_partner_price_wo_nds win_max_positions_count an_fraud_shows an_fraud_clicks an_fraud_hits an_cover_hits an_cover_direct_hits an_cover_market_hits an_cover_mcb_hits an_cover_senthits an_rtb_cover_hits an_rtb_cover_centhits an_rtb_cover_direct_hits direct_page_ad_shows premium_page_ad_shows direct_hits market_hits mcb_hits premium_hits view open_player);

my $values_before =
`echo "select count(), sum(shows), sum(clicks), sum(hits), sum(direct_clicks), sum(direct_shows), sum(all_w_nds), sum(all_wo_nds), sum(partner_w_nds), sum(partner_wo_nds), sum(bad_shows), sum(bad_hits), sum(bad_win_price_w_nds), sum(bad_win_price_wo_nds), sum(bad_win_partner_price_w_nds), sum(bad_win_partner_price_wo_nds), sum(win_max_positions_count), sum(an_fraud_shows), sum(an_fraud_clicks), sum(an_fraud_hits), sum(an_cover_hits), sum(an_cover_direct_hits), sum(an_cover_market_hits), sum(an_cover_mcb_hits), sum(an_cover_senthits), sum(an_rtb_cover_hits), sum(an_rtb_cover_senthits), sum(an_rtb_cover_direct_hits), sum(direct_page_ad_shows), sum(premium_page_ad_shows), sum(direct_hits), sum(market_hits), sum(mcb_hits), sum(premium_hits), sum(view), sum(open_player) from partner.$table_from FORMAT TabSeparated" | $ch_conn_1`;
chomp $values_before;
my @values_before = split "\t", $values_before;

printf "%s New table...\n", curdate(oformat => 'db_time');
my $values_after =
`echo "select count(), sum(shows), sum(clicks), sum(hits), sum(direct_clicks), sum(direct_shows), sum(all_w_nds), sum(all_wo_nds), sum(partner_w_nds), sum(partner_wo_nds), sum(bad_shows), sum(bad_hits), sum(bad_win_price_w_nds), sum(bad_win_price_wo_nds), sum(bad_win_partner_price_w_nds), sum(bad_win_partner_price_wo_nds), sum(win_max_positions_count), sum(an_fraud_shows), sum(an_fraud_clicks), sum(an_fraud_hits), sum(an_cover_hits), sum(an_cover_direct_hits), sum(an_cover_market_hits), sum(an_cover_mcb_hits), sum(an_cover_senthits), sum(an_rtb_cover_hits), sum(an_rtb_cover_senthits), sum(an_rtb_cover_direct_hits), sum(direct_page_ad_shows), sum(premium_page_ad_shows), sum(direct_hits), sum(market_hits), sum(mcb_hits), sum(premium_hits), sum(view), sum(open_player) from partner.$table_to FORMAT TabSeparated" | $ch_conn_1`;
chomp $values_after;
my @values_after = split "\t", $values_after;

printf "%s Comparing values:\n", curdate(oformat => 'db_time');
my $all_ok = TRUE;
foreach (@field_list) {
    my $after  = shift @values_after;
    my $before = shift @values_before;
    if ($after == $before) {
        print "SUM of fields $_ equals\n";
    } else {
        print "SUM of fields $_ differ. Before:$before. After:$after.\n";
        $all_ok = FALSE unless $_ eq 'count';
    }
}
if ($all_ok) {
    print "OK! After table transform summ of all fields in old and new tables equals!";
} else {
    print "Attention! After table transform summ of something fields in old and new tables differ!";
}

rmdir $TEMP_DIR;
print "#END\n";
