#!/usr/bin/perl

=encoding UTF-8

=head1 DESCRIPTION

Скрипт для изменения таблицы статистики и переналивки данных

=head1 USAGE

perl ./migrations/before_release/PI-13340_alter_clickhouse_for_statistics_insert_data_into_new_table.pl --stage=test --pasword=xxxxxxxx

=head1 OPTIONS

  stage    - DB stage (test / prod)
  password - DB password

=cut

use qbit;

use Pod::Usage;
use Getopt::Long qw();

sub _get_args {
    my $password;
    my $help  = 0;
    my $stage = 'test';

    Getopt::Long::GetOptions(
        'stage=s'    => \$stage,
        'password=s' => \$password,
        'help|?|h'   => \$help,
    ) or pod2usage(1);

    pod2usage(1) if !defined $password;

    pod2usage(-verbose => 2, -noperldoc => 1) if $help;

    return ($stage, $password);
}

my $TEMP_DIR = '/tmp/clickhouse_tmp_data';
my ($stage, $password) = _get_args;

print "#START#\n";
mkdir $TEMP_DIR;

my $CH_CONN_PRESTABLE_1 =
"clickhouse-client --ssl --host=man-1t71zs5e3qalyfvq.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_CONN_PRESTABLE_2 =
"clickhouse-client --ssl --host=sas-h69utjbj38jrgroj.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_PRESTABLE_CLUSTER = 'partner_prestable_cluster';

my $CH_CONN_PRODUCTION_1 =
"clickhouse-client --ssl --host=sas-mw8x8a7kp009dvql.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_CONN_PRODUCTION_2 =
"clickhouse-client --ssl --host=man-whecdgrou6tfa07o.db.yandex.net --port=9440 --user=partner --password=$password --database=partner";
my $CH_PRODUCTION_CLUSTER = 'partner_production_cluster';

my $OLD_TABLE_NAME = 'statistics';
my $NEW_TABLE_NAME = 'statistics_tmp';

my $ch_conn_1;
my $ch_conn_2;
my $cluster;

if ($stage eq 'test') {
    $ch_conn_1 = $CH_CONN_PRESTABLE_1;
    $ch_conn_2 = $CH_CONN_PRESTABLE_2;
    $cluster   = $CH_PRESTABLE_CLUSTER;
} elsif ($stage eq 'prod') {
    $ch_conn_1 = $CH_CONN_PRODUCTION_1;
    $ch_conn_2 = $CH_CONN_PRODUCTION_2;
    $cluster   = $CH_PRODUCTION_CLUSTER;
} else {
    die "You need correctly set STAGE value before running script...";
}

my $CREATE_TMP_TABLE_SQL = qq[
    CREATE TABLE IF NOT EXISTS `partner`.`$NEW_TABLE_NAME` ON CLUSTER $cluster (
    `dt` Date,
    `product_id` Enum8('context_on_site_adblock' = 1,
                    'context_on_site_direct' = 2,
                    'context_on_site_market' = 3,
                    'context_on_site_market_api' = 4,
                    'context_on_site_mcb' = 5,
                    'context_on_site_rtb' = 6,
                    'context_on_site_stripe' = 7,
                    'dsp' = 8,
                    'internal_context_on_site_direct' = 9,
                    'internal_context_on_site_rtb' = 10,
                    'internal_context_on_site_stripe' = 11,
                    'internal_mobile_app_rtb' = 12,
                    'internal_search_on_site_direct' = 13,
                    'internal_search_on_site_premium' = 14,
                    'mobile_app_rtb' = 15,
                    'search_on_site_direct' = 16,
                    'search_on_site_market' = 17,
                    'search_on_site_market_api' = 18,
                    'search_on_site_mcb' = 19,
                    'search_on_site_premium' = 20,
                    'ssp_context_on_site_rtb' = 21,
                    'ssp_mobile_app_rtb' = 22,
                    'ssp_video_an_site_rtb' = 23,
                    'video_an_site_fullscreen' = 24,
                    'video_an_site_inpage' = 25,
                    'video_an_site_instream' = 26,
                    'context_on_site_adfox' = 27,
                    'context_on_site_campaign' = 28,
                    'search_on_site_campaign' = 29,
                    'internal_context_on_site_campaign' = 30,
                    'internal_search_on_site_campaign' = 31,
                    'mobile_app_settings' = 32,
                    'video_an_site' = 33,
                    'ssp_context_on_site_campaign' = 34,
                    'ssp_mobile_app_settings' = 35,
                    'ssp_video_an_site' = 36,
                    'context_on_site_content' = 37,
                    'mobile_mediation_block' = 38),
    `page_id` UInt32,
    `block_id` UInt32,
    `dsp_id` UInt32,
    `currency_id` UInt8,
    `tag_id` UInt32,
    `category_id` UInt32,
    `monetizer_id` UInt32,
    `shows` Int64,
    `clicks` Int64,
    `hits` Int64,
    `direct_clicks` Int64,
    `direct_shows` Int64,
    `all_w_nds` Int64,
    `all_wo_nds` Int64,
    `partner_w_nds` Int64,
    `partner_wo_nds` Int64,
    `bad_shows` Int64,
    `bad_hits` Int64,
    `bad_win_price_w_nds` Int64,
    `bad_win_price_wo_nds` Int64,
    `bad_win_partner_price_w_nds` Int64,
    `bad_win_partner_price_wo_nds` Int64,
    `win_max_positions_count` Int64,
    `an_fraud_shows` Int64,
    `an_fraud_clicks` Int64,
    `an_fraud_hits` Int64,
    `an_cover_hits` Int64,
    `an_cover_direct_hits` Int64,
    `an_cover_market_hits` Int64,
    `an_cover_mcb_hits` Int64,
    `an_cover_senthits` Int64,
    `an_rtb_cover_hits` Int64,
    `an_rtb_cover_senthits` Int64,
    `an_rtb_cover_direct_hits` Int64,
    `direct_page_ad_shows` Int64,
    `premium_page_ad_shows` Int64,
    `direct_hits` Int64,
    `market_hits` Int64,
    `mcb_hits` Int64,
    `premium_hits` Int64,
    `view` Int64,
    `open_player` Int64
    )
    ENGINE = ReplicatedSummingMergeTree('/statistics_pi2', '{replica}', dt, (dt, product_id, page_id, block_id, dsp_id, currency_id, tag_id, category_id, monetizer_id), 8192);];

#Не переносим 60 последних дней
my $end_date = curdate(oformat => 'db');
my $stop_date = date_sub($end_date, day => 60, iformat => 'db', oformat => 'db');

writefile("$TEMP_DIR/SQL", $CREATE_TMP_TABLE_SQL);
print `cat $TEMP_DIR/SQL | $ch_conn_1`;

my $table_from = $OLD_TABLE_NAME;
my $table_to   = $NEW_TABLE_NAME;

my $exists_rows = `echo "SELECT count() from $table_to FORMAT TabSeparated" | $ch_conn_1`;
chomp($exists_rows);

my $start_date;
#Определяем минимальную дату, за которую данные еще не переносились
if ($exists_rows > 0) {
    $start_date = `echo "SELECT MAX(dt) from $table_to FORMAT TabSeparated" | $ch_conn_1`;
    chomp($start_date);
    $start_date = date_add($start_date, day => 1, iformat => 'db', oformat => 'db');
} else {
    $start_date = `echo "SELECT MIN(dt) from $table_from FORMAT TabSeparated" | $ch_conn_1`;
    chomp($start_date);
}
print "\n== $start_date ==\n";
$end_date = date_add($start_date, day => 10, iformat => 'db', oformat => 'db');

my $check_date;
#Переносим данные пачками по 10 дней
while ($stop_date ge $end_date) {

    my $rows =
`echo "SELECT count(*) from $table_from where dt>='$start_date' and dt <= '$end_date' FORMAT TabSeparated" | $ch_conn_1`;
    chomp($rows);
    printf("%s Period: %s - %s\n", curdate(oformat => 'db_time'), $start_date, $end_date);

    my $sql = qq{
        SELECT
            `dt`,
            `product_id`,
            `page_id`,
            `block_id`,
            `dsp_id`,
            `currency_id`,
            `tag_id`,
            `category_id`,
            `shows`,
            `clicks`,
            `hits`,
            `direct_clicks`,
            `direct_shows`,
            `all_w_nds`,
            `all_wo_nds`,
            `partner_w_nds`,
            `partner_wo_nds`,
            `bad_shows`,
            `bad_hits`,
            `bad_win_price_w_nds`,
            `bad_win_price_wo_nds`,
            `bad_win_partner_price_w_nds`,
            `bad_win_partner_price_wo_nds`,
            `win_max_positions_count`,
            `an_fraud_shows`,
            `an_fraud_clicks`,
            `an_fraud_hits`,
            `an_cover_hits`,
            `an_cover_direct_hits`,
            `an_cover_market_hits`,
            `an_cover_mcb_hits`,
            `an_cover_senthits`,
            `an_rtb_cover_hits`,
            `an_rtb_cover_senthits`,
            `an_rtb_cover_direct_hits`,
            `direct_page_ad_shows`,
            `premium_page_ad_shows`,
            `direct_hits`,
            `market_hits`,
            `mcb_hits`,
            `premium_hits`,
            `view`,
            `open_player`
        FROM `partner`.`$table_from`
        WHERE
            `dt` >= \'$start_date\' and `dt` <= \'$end_date\'
        FORMAT TabSeparated
    };

    writefile("$TEMP_DIR/SQL", $sql);

    system "cat $TEMP_DIR/SQL | $ch_conn_1 > $TEMP_DIR/data.tsv";

    unlink "$TEMP_DIR/SQL";

    printf("%s Rows exported: %s\n", curdate(oformat => 'db_time'), $rows);

    print
`$ch_conn_1 --query="INSERT INTO $table_to (dt,product_id,page_id,block_id,dsp_id,currency_id,tag_id,category_id,shows,clicks,hits,direct_clicks,direct_shows,all_w_nds,all_wo_nds,partner_w_nds,partner_wo_nds,bad_shows,bad_hits,bad_win_price_w_nds,bad_win_price_wo_nds,bad_win_partner_price_w_nds,bad_win_partner_price_wo_nds,win_max_positions_count,an_fraud_shows,an_fraud_clicks,an_fraud_hits,an_cover_hits,an_cover_direct_hits,an_cover_market_hits,an_cover_mcb_hits,an_cover_senthits,an_rtb_cover_hits,an_rtb_cover_senthits,an_rtb_cover_direct_hits,direct_page_ad_shows,premium_page_ad_shows,direct_hits,market_hits,mcb_hits,premium_hits,view,open_player) FORMAT TabSeparated " < $TEMP_DIR/data.tsv`;

    print "Rows inserted.\nRemove temporary file $TEMP_DIR/data.tsv\n\n";

    unlink "$TEMP_DIR/data.tsv";

    $check_date = $end_date;
    $start_date = date_add($end_date, day => 1, iformat => 'db', oformat => 'db');
    $end_date   = date_add($start_date, day => 10, iformat => 'db', oformat => 'db');
}

print `echo "optimize table $table_to" | $ch_conn_1`;
print `echo "optimize table $table_to" | $ch_conn_2`;

#Проверяем данные.
print "Checking values in tables...\n";
printf "%s Old table...\n", curdate(oformat => 'db_time');

my @field_list =
  qw(count shows clicks hits diract_clicks direct_shows all_w_nds all_wo_nds partner_w_nds partner_wo_nds bad_shows bad_hits bad_win_price_w_nds bad_win_price_wo_nds pad_win_partner_price_w_nds bad_win_partner_price_wo_nds win_max_positions_count an_fraud_shows an_fraud_clicks an_fraud_hits an_cover_hits an_cover_direct_hits an_cover_market_hits an_cover_mcb_hits an_cover_senthits an_rtb_cover_hits an_rtb_cover_centhits an_rtb_cover_direct_hits direct_page_ad_shows premium_page_ad_shows direct_hits market_hits mcb_hits premium_hits view open_player);

my $values_before =
`echo "select count(), sum(shows), sum(clicks), sum(hits), sum(direct_clicks), sum(direct_shows), sum(all_w_nds), sum(all_wo_nds), sum(partner_w_nds), sum(partner_wo_nds), sum(bad_shows), sum(bad_hits), sum(bad_win_price_w_nds), sum(bad_win_price_wo_nds), sum(bad_win_partner_price_w_nds), sum(bad_win_partner_price_wo_nds), sum(win_max_positions_count), sum(an_fraud_shows), sum(an_fraud_clicks), sum(an_fraud_hits), sum(an_cover_hits), sum(an_cover_direct_hits), sum(an_cover_market_hits), sum(an_cover_mcb_hits), sum(an_cover_senthits), sum(an_rtb_cover_hits), sum(an_rtb_cover_senthits), sum(an_rtb_cover_direct_hits), sum(direct_page_ad_shows), sum(premium_page_ad_shows), sum(direct_hits), sum(market_hits), sum(mcb_hits), sum(premium_hits), sum(view), sum(open_player) from partner.$table_from where dt <= '$check_date' FORMAT TabSeparated" | $ch_conn_1`;
chomp $values_before;
my @values_before = split "\t", $values_before;

printf "%s New table...\n", curdate(oformat => 'db_time');
my $values_after =
`echo "select count(), sum(shows), sum(clicks), sum(hits), sum(direct_clicks), sum(direct_shows), sum(all_w_nds), sum(all_wo_nds), sum(partner_w_nds), sum(partner_wo_nds), sum(bad_shows), sum(bad_hits), sum(bad_win_price_w_nds), sum(bad_win_price_wo_nds), sum(bad_win_partner_price_w_nds), sum(bad_win_partner_price_wo_nds), sum(win_max_positions_count), sum(an_fraud_shows), sum(an_fraud_clicks), sum(an_fraud_hits), sum(an_cover_hits), sum(an_cover_direct_hits), sum(an_cover_market_hits), sum(an_cover_mcb_hits), sum(an_cover_senthits), sum(an_rtb_cover_hits), sum(an_rtb_cover_senthits), sum(an_rtb_cover_direct_hits), sum(direct_page_ad_shows), sum(premium_page_ad_shows), sum(direct_hits), sum(market_hits), sum(mcb_hits), sum(premium_hits), sum(view), sum(open_player) from partner.$table_to where dt <= '$check_date' FORMAT TabSeparated" | $ch_conn_1`;
chomp $values_after;
my @values_after = split "\t", $values_after;

printf "%s Comparing values:\n", curdate(oformat => 'db_time');
my $all_ok = TRUE;
#Сравниваем результаты по выборке данных из таблиц и выводим итоговую сводку:
foreach (@field_list) {
    my $after  = shift @values_after;
    my $before = shift @values_before;
    if ($after == $before) {
        print "SUM of fields $_ equals\n";
    } else {
        print "SUM of fields $_ differ. Before:$before. After:$after.\n";
        $all_ok = FALSE unless $_ eq 'count';
    }
}
if ($all_ok) {
    print "OK! After table transform summ of all fields in old and new tables equals!";
} else {
    print "Attention! After table transform summ of something fields in old and new tables differ!";
}

rmdir $TEMP_DIR;
print "#END\n";
