#!/usr/bin/perl

use lib::abs qw(../lib);

use Pod::Usage;
use Getopt::Long qw();

use qbit;

use Application;

use PiSecrets;

=encoding UTF-8

=head1 DESCRIPTION

Скрипт сравнивает статистику из mysql и clickhouse. (Используются подключения беты)

=head1 USAGE

  perl /bin/oneshots/compare_mysql_vs_clickhouse.pl
  perl /bin/oneshots/compare_mysql_vs_clickhouse.pl --levels=statistics_advnet_context_on_site_adblock,statistics_advnet_context_on_site_mcb
  perl /bin/oneshots/compare_mysql_vs_clickhouse.pl --date=2018-01-01

=head1 OPTIONS

  levels - Уровни для которых нужно сравнить статистику (дефолт: все)
  date   - Дата за которую делается сверка (дефолт: текущая)
  prod   - Для запуска на проде (от этого зависит только порт к ClickHouse)
  ssl    - Включает проверку сертификата и меняет порт

=cut

# ~/beta> ./bin/oneshots/compare_mysql_vs_clickhouse.pl
#
# 2018-01-23
#     mobile_app_rtb
#         StatCompare/MySQL/mobile_app_rtb_ch.tsv
#         StatCompare/CH/mobile_app_rtb_mysql.tsv
#     internal_context_on_site_rtb
#         StatCompare/MySQL/internal_context_on_site_rtb_ch.tsv
#         StatCompare/CH/internal_context_on_site_rtb_mysql.tsv
#     context_on_site_rtb
#         StatCompare/MySQL/context_on_site_rtb_ch.tsv
#         StatCompare/CH/context_on_site_rtb_mysql.tsv
#
# StatCompare/
# ├── [4.0K]  CH
# │   ├── [178M]  context_on_site_rtb_mysql.tsv
# │   ├── [4.1M]  internal_context_on_site_rtb_mysql.tsv
# │   └── [8.6M]  mobile_app_rtb_mysql.tsv
# └── [4.0K]  MySQL
#     ├── [178M]  context_on_site_rtb_ch.tsv
#     ├── [4.1M]  internal_context_on_site_rtb_ch.tsv
#     └── [8.6M]  mobile_app_rtb_ch.tsv
#
# 2 directories 6 files
# END

my $SQL_DIR = 'SqlDir';
my $RES_DIR = 'StatCompare';

my @CH_PK = qw(
  dt
  product_id
  page_id
  block_id
  dsp_id
  currency_id
  tag_id
  );

main();

sub main {

    my ($date, $levels_filter, $is_on_prod, $ssl) = _get_args();

    `mkdir -p $SQL_DIR/MySQL $SQL_DIR/CH $RES_DIR/MySQL $RES_DIR/CH`;

    my $app = Application->new();

    $app->pre_run();

    my $mysql_conf = $app->get_option('partner_db');
    my $ch_conf    = $app->get_option('clickhouse_db');

    my $mysql_connect = sprintf("MYSQL_PWD=%s mysql -u %s --port=%s  --host=%s %s --default-character-set=utf8",
        @$mysql_conf{qw(password  user  port  host  database)});

    my $ch_port    = $ssl ? 9440 : ($is_on_prod ? 19000 : 9000);
    my $password   = get_secret('partner2-clickhouse-password');
    my $ch_connect = sprintf(
        "clickhouse-client %s --port=%s --host=%s --user=%s -d %s --password=%s -m --format=TabSeparatedWithNames",
        ($ssl ? '--ssl' : ''),
        $ch_port, @$ch_conf{qw(host user database)}, $password
    );

    my $levels = $app->product_manager->get_statistics_accessors();

    foreach my $level (sort @$levels) {
        my $level_obj = $app->$level;

        next
          if $level_obj->accessor eq 'statistics_dsp'
              || $level_obj->support_clickhouse
              || ($level_obj->isa('Application::Model::Statistics::Hierarchy') && !$level_obj->_can('query'))
              || (%$levels_filter && !$levels_filter->{$level});

        my $table = $level_obj->_get_stat_table();

        my $table_name = $table->name;

        print "    $table_name\n";

        my $mysql_path = "$SQL_DIR/MySQL/${table_name}.sql";
        my $ch_path    = "$SQL_DIR/CH/${table_name}.ch";

        my $mysql_fname = "$RES_DIR/MySQL/${table_name}_mysql.tsv";
        my $ch_fname    = "$RES_DIR/CH/${table_name}_ch.tsv";

        my %mysql_pk = map {$_ => TRUE} @{$table->primary_key()};

        my @mysql_all_fields = map {$_->{'name'}} @{$table->fields()};

        my @mysql_pk = sort keys(%mysql_pk);

        my ($map_fields, $query_fields) = _get_map_fields($level_obj, \@mysql_all_fields);

        my @ch_pk = map {$map_fields->{$_}} @mysql_pk;

        my @fields = sort grep {!$mysql_pk{$_} && defined($map_fields->{$_})} @mysql_all_fields;

        my $mysql_query = $app->partner_db->query->select(
            table  => $table,
            fields => {(map {$_ => ''} @mysql_pk), map {$_ => {SUM => [$_]}} @fields},
            filter => {dt => $date},
        );

        $mysql_query->fields_order(@mysql_pk, @fields);

        $mysql_query->group_by(@mysql_pk);
        $mysql_query->order_by(@mysql_pk);

        my ($sql) = $mysql_query->get_sql_with_data();

        writefile($mysql_path, $sql);

        my $ch_query = $app->clickhouse_db->query->select(
            table  => $app->clickhouse_db->statistics,
            fields => {(map {$_ => $map_fields->{$_}} @mysql_pk), map {$_ => $query_fields->{$_}} @fields},
            filter => {dt => $date, product_id => $level_obj->product->accessor},
        );

        $ch_query->fields_order(@mysql_pk, @fields);

        $ch_query->group_by(@ch_pk);
        $ch_query->order_by(@ch_pk);
        $ch_query->format('TabSeparatedWithNames');

        ($sql) = $ch_query->get_sql_with_data();

        writefile($ch_path, $sql);

        print `cat $mysql_path | $mysql_connect > $mysql_fname`;
        print "        $mysql_fname\n";

        print `cat $ch_path | $ch_connect > $ch_fname`;
        print "        $ch_fname\n";

        print `diff $mysql_fname  $ch_fname`;
    }

    print `tree -h "$RES_DIR/"`;

    print "END\n";

    $app->post_run();
}

#TODO: заменить эту сабу на построение запроса через get_statistics
sub _get_map_fields {
    my ($level, $fields) = @_;

    my %row = map {$_ => 0} @$fields;

    my %duplicate_fields = ();

    my %map_fields   = ();
    my %query_fields = ();
    foreach my $field_name (sort {$b cmp $a} @$fields) {
        # обратная сортировка чтобы sort и hits оказались в конце
        my %mysql_row = %row;
        $mysql_row{$field_name} = 1;

        my $ch_row = $level->convert_mysql_to_clickhouse([\%mysql_row])->[0];

        $ch_row->{'currency_id'} = 0 if $field_name ne 'currency_id';

        my ($ch_field) = grep {$_ ne 'product_id' && $ch_row->{$_}} keys(%$ch_row);

        $map_fields{$field_name} = $ch_field;

        if ($field_name =~ /_own_adv\z/) {
            $duplicate_fields{$field_name} = TRUE;

            $query_fields{$field_name} = {sumIf => [$ch_field, ['dsp_id', '=', \5]]};
        } elsif ($field_name =~ /_unsold\z/) {
            $duplicate_fields{$field_name} = TRUE;

            $query_fields{$field_name} = {sumIf => [$ch_field, ['dsp_id', '=', \10]]};
        } else {
            my @dsp_id_ignore_filter = ();
            push(@dsp_id_ignore_filter, ['dsp_id', '<>', \5])  if $duplicate_fields{$field_name . '_own_adv'};
            push(@dsp_id_ignore_filter, ['dsp_id', '<>', \10]) if $duplicate_fields{$field_name . '_unsold'};

            $query_fields{$field_name} =
              @dsp_id_ignore_filter ? {sumIf => [$ch_field, ['AND', \@dsp_id_ignore_filter]]} : {SUM => [$ch_field]};
        }
    }

    return (\%map_fields, \%query_fields);
}

sub _get_args {

    my $levels     = '';
    my $date       = undef;
    my $help       = 0;
    my $is_on_prod = 0;
    my $ssl        = 0;

    Getopt::Long::GetOptions(
        #--- Obligatory
        'levels:s' => \$levels,
        'date:s'   => \$date,
        'prod!'    => \$is_on_prod,
        'ssl!'     => \$ssl,
        #---
        'help|?|h' => \$help,
    ) or pod2usage(1);

    pod2usage(-verbose => 2, -noperldoc => 1) if $help;

    $date = curdate(oformat => 'db') unless defined($date);

    my %levels_filter = map {$_ => TRUE} split(/,/, $levels);

    return ($date, \%levels_filter, $is_on_prod, $ssl);
}
