#!/usr/bin/perl

=encoding UTF-8

=head1 DESCRIPTION

Скрипт для изменения таблицы статистики и переналивки данных

=head1 USAGE

perl -I./lib ./migrations/before_release/PI-18568_alter_clickhouse_table_for_new_currency.pl --stage=ts --date_to=2020-01-01

=head1 OPTIONS

  stage    - DB stage (ts / prod)

=cut

use qbit;
use Utils::ScriptWrapper;

my $TEMP_DIR = '/tmp/clickhouse_tmp_data';

sub args {
    my ($opts) = @_;

    return (
        'stage:s'       => \$opts->{'stage'},
        'date_to:s'     => \$opts->{'date_to'},
        'create_table!' => \$opts->{'create_table'},
        'rename_table!' => \$opts->{'rename_table'},
    );
}

run(
    sub {
        my ($app, $opts) = @_;
        print logstr "#START#";
        mkdir $TEMP_DIR;

        my $CH_PRESTABLE_CLUSTER  = 'partner_prestable_cluster';
        my $CH_PRODUCTION_CLUSTER = 'partner_production_cluster';

        my $OLD_TABLE_NAME    = 'statistics';
        my $NEW_TABLE_NAME    = 'statistics_tmp';
        my $BACKUP_TABLE_NAME = 'statistics_old';

        my $ch_conn_1 = "clickhouse_mdb --server=$opts->{stage}";
        my $cluster;

        if ($opts->{stage} eq 'ts') {
            $cluster = $CH_PRESTABLE_CLUSTER;
        } elsif ($opts->{stage} eq 'prod') {
            $cluster = $CH_PRODUCTION_CLUSTER;
        } else {
            die "You need correctly set STAGE value before running script...";
        }

        my $CREATE_TMP_TABLE_SQL = qq[
    CREATE TABLE IF NOT EXISTS `partner`.`$NEW_TABLE_NAME` ON CLUSTER $cluster (
    `dt` Date,
    `product_id` Enum8('context_on_site_adblock' = 1,
        'context_on_site_direct' = 2,
        'context_on_site_market' = 3,
        'context_on_site_market_api' = 4,
        'context_on_site_mcb' = 5,
        'context_on_site_rtb' = 6,
        'context_on_site_stripe' = 7,
        'dsp' = 8,
        'internal_context_on_site_direct' = 9,
        'internal_context_on_site_rtb' = 10,
        'internal_context_on_site_stripe' = 11,
        'internal_mobile_app_rtb' = 12,
        'internal_search_on_site_direct' = 13,
        'internal_search_on_site_premium' = 14,
        'mobile_app_rtb' = 15,
        'search_on_site_direct' = 16,
        'search_on_site_market' = 17,
        'search_on_site_market_api' = 18,
        'search_on_site_mcb' = 19,
        'search_on_site_premium' = 20,
        'ssp_context_on_site_rtb' = 21,
        'ssp_mobile_app_rtb' = 22,
        'ssp_video_an_site_rtb' = 23,
        'video_an_site_fullscreen' = 24,
        'video_an_site_inpage' = 25,
        'video_an_site_instream' = 26,
        'context_on_site_adfox' = 27,
        'context_on_site_campaign' = 28,
        'search_on_site_campaign' = 29,
        'internal_context_on_site_campaign' = 30,
        'internal_search_on_site_campaign' = 31,
        'mobile_app_settings' = 32,
        'video_an_site' = 33,
        'ssp_context_on_site_campaign' = 34,
        'ssp_mobile_app_settings' = 35,
        'ssp_video_an_site' = 36,
        'context_on_site_content' = 37,
        'mobile_mediation_block' = 38,
        'internal_context_on_site_content' = 39,
        'indoor' = 40,
        'indoor_block' = 41,
        'outdoor' = 42,
        'outdoor_block' = 43,
        'context_on_site_natural' = 44,
        'internal_context_on_site_natural' = 45),
    `page_id` UInt32,
    `block_id` UInt32,
    `dsp_id` UInt32,
    `currency_id` UInt16,
    `tag_id` UInt32,
    `category_id` UInt32,
    `monetizer_id` UInt32,
    `shows` Int64,
    `clicks` Int64,
    `hits` Int64,
    `impressions` Int64,
    `direct_clicks` Int64,
    `direct_shows` Int64,
    `all_w_nds` Int64,
    `all_wo_nds` Int64,
    `partner_w_nds` Int64,
    `partner_wo_nds` Int64,
    `bad_shows` Int64,
    `bad_hits` Int64,
    `bad_win_price_w_nds` Int64,
    `bad_win_price_wo_nds` Int64,
    `bad_win_partner_price_w_nds` Int64,
    `bad_win_partner_price_wo_nds` Int64,
    `win_max_positions_count` Int64,
    `an_fraud_shows` Int64,
    `an_fraud_clicks` Int64,
    `an_fraud_hits` Int64,
    `an_cover_hits` Int64,
    `an_cover_direct_hits` Int64,
    `an_cover_market_hits` Int64,
    `an_cover_mcb_hits` Int64,
    `an_cover_senthits` Int64,
    `an_rtb_cover_hits` Int64,
    `an_rtb_cover_senthits` Int64,
    `an_rtb_cover_direct_hits` Int64,
    `direct_page_ad_shows` Int64,
    `premium_page_ad_shows` Int64,
    `direct_hits` Int64,
    `market_hits` Int64,
    `mcb_hits` Int64,
    `premium_hits` Int64,
    `view` Int64,
    `open_player` Int64,
    `calculated_revenue` Int64,
    `calculated_revenue_original` Int64
    ) ENGINE = ReplicatedSummingMergeTree('/statistics_pi_2', '{replica}', dt, (dt, product_id, page_id, block_id, dsp_id, currency_id, tag_id, category_id, monetizer_id), 8192);];

        #Не переносим 60 последних дней
        my $end_date = curdate(oformat => 'db');
        my $stop_date = $opts->{date_to} ? $opts->{date_to} : $end_date;

        if ($opts->{create_table}) {
            print logstr "stage 0 create table";
            writefile("$TEMP_DIR/SQL", $CREATE_TMP_TABLE_SQL);
            print `cat $TEMP_DIR/SQL | $ch_conn_1`;
        }

        print logstr "stage 1";
        my $table_from = $OLD_TABLE_NAME;
        my $table_to   = $NEW_TABLE_NAME;

        my $exists_rows = `echo "SELECT count() from $table_to FORMAT TabSeparated" | $ch_conn_1`;
        chomp($exists_rows);
        $exists_rows =~ s/\n//mg;

        print logstr "stage 2";
        print logstr $exists_rows;
        my $start_date;
        #Определяем минимальную дату, за которую данные еще не переносились
        if ($exists_rows > 0) {
            $start_date = `echo "SELECT MAX(dt) from $table_to FORMAT TabSeparated" | $ch_conn_1`;
            chomp($start_date);
            $start_date =~ s/\n//mg;
            $start_date = date_add($start_date, day => 1, iformat => 'db', oformat => 'db');
        } else {
            print logstr "echo \"SELECT MIN(dt) from $table_from FORMAT TabSeparated\" | $ch_conn_1";
            $start_date = `echo "SELECT MIN(dt) from $table_from FORMAT TabSeparated" | $ch_conn_1`;
            chomp($start_date);
            $start_date =~ s/\n//mg;
        }
        print logstr "== $start_date ==";
        $end_date = date_add($start_date, day => 10, iformat => 'db', oformat => 'db');

        $end_date = $stop_date if ($stop_date le $end_date);
        my $check_date;
        #Переносим данные пачками по 10 дней
        do {

            my $rows =
`echo "SELECT count(*) from $table_from where dt>='$start_date' and dt <= '$end_date' FORMAT TabSeparated" | $ch_conn_1`;
            chomp($rows);
            $rows =~ s/\n//mg;
            print logstr sprintf("Period: %s - %s", $start_date, $end_date);

            my $sql = qq{
INSERT INTO $table_to (dt,product_id,page_id,block_id,dsp_id,currency_id,tag_id,category_id,monetizer_id,shows,clicks,hits,impressions,direct_clicks,direct_shows,all_w_nds,all_wo_nds,partner_w_nds,partner_wo_nds,bad_shows,bad_hits,bad_win_price_w_nds,bad_win_price_wo_nds,bad_win_partner_price_w_nds,bad_win_partner_price_wo_nds,win_max_positions_count,an_fraud_shows,an_fraud_clicks,an_fraud_hits,an_cover_hits,an_cover_direct_hits,an_cover_market_hits,an_cover_mcb_hits,an_cover_senthits,an_rtb_cover_hits,an_rtb_cover_senthits,an_rtb_cover_direct_hits,direct_page_ad_shows,premium_page_ad_shows,direct_hits,market_hits,mcb_hits,premium_hits,view,open_player)
        SELECT
    `dt`,
    `product_id`,
    `page_id`,
    `block_id`,
    `dsp_id`,
    `currency_id`,
    `tag_id`,
    `category_id`,
    `monetizer_id`,
    `shows`,
    `clicks`,
    `hits`,
    `impressions`,
    `direct_clicks`,
    `direct_shows`,
    `all_w_nds`,
    `all_wo_nds`,
    `partner_w_nds`,
    `partner_wo_nds`,
    `bad_shows`,
    `bad_hits`,
    `bad_win_price_w_nds`,
    `bad_win_price_wo_nds`,
    `bad_win_partner_price_w_nds`,
    `bad_win_partner_price_wo_nds`,
    `win_max_positions_count`,
    `an_fraud_shows`,
    `an_fraud_clicks`,
    `an_fraud_hits`,
    `an_cover_hits`,
    `an_cover_direct_hits`,
    `an_cover_market_hits`,
    `an_cover_mcb_hits`,
    `an_cover_senthits`,
    `an_rtb_cover_hits`,
    `an_rtb_cover_senthits`,
    `an_rtb_cover_direct_hits`,
    `direct_page_ad_shows`,
    `premium_page_ad_shows`,
    `direct_hits`,
    `market_hits`,
    `mcb_hits`,
    `premium_hits`,
    `view`,
    `open_player`
        FROM `partner`.`$table_from`
        WHERE
            `dt` >= \'$start_date\' and `dt` <= \'$end_date\'
    };

            writefile("$TEMP_DIR/SQL", $sql);

            system "cat $TEMP_DIR/SQL | $ch_conn_1";
            unlink "$TEMP_DIR/SQL";
            print logstr "Rows inserted.";

            $check_date = $end_date;
            $start_date = date_add($end_date, day => 1, iformat => 'db', oformat => 'db');
            $end_date   = date_add($start_date, day => 10, iformat => 'db', oformat => 'db');
            $end_date   = $stop_date if ($start_date le $stop_date && $stop_date le $end_date);
        } while ($stop_date ge $end_date);

        print logstr `echo "optimize table $table_to on cluster $cluster" | $ch_conn_1`;

        #Проверяем данные.
        print logstr "Checking values in tables...";
        print logstr "Old table...";

        my @field_list =
          qw(count shows clicks hits impressions direct_clicks direct_shows all_w_nds all_wo_nds partner_w_nds partner_wo_nds bad_shows bad_hits bad_win_price_w_nds bad_win_price_wo_nds pad_win_partner_price_w_nds bad_win_partner_price_wo_nds win_max_positions_count an_fraud_shows an_fraud_clicks an_fraud_hits an_cover_hits an_cover_direct_hits an_cover_market_hits an_cover_mcb_hits an_cover_senthits an_rtb_cover_hits an_rtb_cover_centhits an_rtb_cover_direct_hits direct_page_ad_shows premium_page_ad_shows direct_hits market_hits mcb_hits premium_hits view open_player);

        my $values_before =
`echo "select count(), sum(shows), sum(clicks), sum(hits), sum(impressions), sum(direct_clicks), sum(direct_shows), sum(all_w_nds), sum(all_wo_nds), sum(partner_w_nds), sum(partner_wo_nds), sum(bad_shows), sum(bad_hits), sum(bad_win_price_w_nds), sum(bad_win_price_wo_nds), sum(bad_win_partner_price_w_nds), sum(bad_win_partner_price_wo_nds), sum(win_max_positions_count), sum(an_fraud_shows), sum(an_fraud_clicks), sum(an_fraud_hits), sum(an_cover_hits), sum(an_cover_direct_hits), sum(an_cover_market_hits), sum(an_cover_mcb_hits), sum(an_cover_senthits), sum(an_rtb_cover_hits), sum(an_rtb_cover_senthits), sum(an_rtb_cover_direct_hits), sum(direct_page_ad_shows), sum(premium_page_ad_shows), sum(direct_hits), sum(market_hits), sum(mcb_hits), sum(premium_hits), sum(view), sum(open_player) from partner.$table_from where dt <= '$check_date' FORMAT TabSeparated" | $ch_conn_1`;
        chomp $values_before;
        $values_before =~ s/\n//mg;
        my @values_before = split "\t", $values_before;

        print logstr "New table...";
        my $values_after =
`echo "select count(), sum(shows), sum(clicks), sum(hits), sum(impressions), sum(direct_clicks), sum(direct_shows), sum(all_w_nds), sum(all_wo_nds), sum(partner_w_nds), sum(partner_wo_nds), sum(bad_shows), sum(bad_hits), sum(bad_win_price_w_nds), sum(bad_win_price_wo_nds), sum(bad_win_partner_price_w_nds), sum(bad_win_partner_price_wo_nds), sum(win_max_positions_count), sum(an_fraud_shows), sum(an_fraud_clicks), sum(an_fraud_hits), sum(an_cover_hits), sum(an_cover_direct_hits), sum(an_cover_market_hits), sum(an_cover_mcb_hits), sum(an_cover_senthits), sum(an_rtb_cover_hits), sum(an_rtb_cover_senthits), sum(an_rtb_cover_direct_hits), sum(direct_page_ad_shows), sum(premium_page_ad_shows), sum(direct_hits), sum(market_hits), sum(mcb_hits), sum(premium_hits), sum(view), sum(open_player) from partner.$table_to where dt <= '$check_date' FORMAT TabSeparated" | $ch_conn_1`;
        chomp $values_after;
        $values_after =~ s/\n//mg;
        my @values_after = split "\t", $values_after;

        print logstr "Comparing values:";
        my $all_ok = TRUE;
        #Сравниваем результаты по выборке данных из таблиц и выводим итоговую сводку:
        foreach (@field_list) {
            my $after  = shift @values_after;
            my $before = shift @values_before;
            if ($after == $before) {
                print logstr "SUM of fields $_ equals";
            } else {
                print logstr "SUM of fields $_ differ. Before:$before. After:$after.";
                $all_ok = FALSE unless $_ eq 'count';
            }
        }
        if ($all_ok) {
            print logstr "OK! After table transform summ of all fields in old and new tables equals!";
            if ($opts->{rename_table}) {
                print logstr "Rename tables...";
                print logstr
`echo 'RENAME TABLE partner.$table_from TO partner.$BACKUP_TABLE_NAME ON CLUSTER $cluster' | $ch_conn_1`;
                print logstr
                  `echo 'RENAME TABLE partner.$table_to TO partner.$table_from ON CLUSTER $cluster'| $ch_conn_1`;
                $table_to   = $table_from;
                $table_from = $BACKUP_TABLE_NAME;

                print logstr `echo "optimize table $table_to on cluster $cluster" | $ch_conn_1`;
            }
        } else {
            print logstr "Attention! After table transform summ of something fields in old and new tables differ!";
        }

        rmdir $TEMP_DIR;
        print logstr "#END";
    }
   );
