#!/usr/bin/perl

use strict;
use warnings;
use utf8;
use lib::abs '../lib';
use feature 'state';
use Getopt::Long;

use DScribe::ClickhouseTable;
use lib '/home/icenine/direct-utils/yandex-lib/clickhouse/lib';
use Yandex::Clickhouse;
use Yandex::TimeCommon qw/mysql2unix unix2mysql/;

$|++;

=head1 

Скрипт для перекладывания данных из помесячных таблиц _yyyymm в большую таблицу _mergetree

=cut

GetOptions(\my %opt,
    "table|t=s",
    # "date|d=s",
    "from=s",
    "to=s",
    "out|o=s",
    "group|g",
    "dry|n",
    "compare",
) or die "invalid options\n";

my %known = (
    ppclog_api => {
        group_by => [qw/reqid cmd cid bid/],
    }
);

my $clh = Yandex::Clickhouse->new(
    host => 'ppchouse01e.yandex.ru',
    timeout => 3600,
    settings => {
        profile => 'heavy',
        max_block_size => 65536,
    },
);

unless ($opt{table}) {
    die "--table required\n";
}

if ($opt{compare}) {
    create_merge_table($opt{table});
}

#$opt{out} //= "$opt{table}_mergetest"; # _mergetree
$opt{out} //= "$opt{table}_mergetree"; # _mergetree

my $date_from_ymd = $opt{from} // '2013-01-01';
my $date_to_ymd   = $opt{to} // '2015-05-01';
# my $date_to_ymd   = '2014-07-11';

for (my $ts = mysql2unix($date_from_ymd); $ts <= mysql2unix($date_to_ymd); $ts += 86400) {
    my $date = unix2ymd($ts);
    my $month = date2yyyymm($date);
    while (!check_table($month)) {
        print "skip $month\n";
        my $new_ts = $ts + 86400;
        while (date2yyyymm($date) eq date2yyyymm(unix2ymd($new_ts))) {
            $new_ts += 86400;
        }
        $ts = $new_ts;
        $date = unix2ymd($ts);
        $month = date2yyyymm($date);
    }
    if ($opt{compare}) {
        compare_for_date($date);
        next;
    }
    print "move for $date\n";
    eval {
        move_for_date($date);
    };
    if ($@) {
        print "FAILED $date: $@\n";
    }
}

#########################################

sub compare_for_date
{
    my $date = shift;
    my $date_sql = $date =~ s!(\d{4})(\d{2})(\d{2})!$1-$2-$3!r;
    my $table = $opt{table};
    my $query_old = qq#select count() from ${table}_merge where log_date = toDate('$date_sql')#;
    my $query_new = qq#select count() from ${table}_mergetree where log_date = toDate('$date_sql')#;

    print "$date\t";
    my $cnt_old = $clh->get_one_field($query_old) // 0;
    print "$cnt_old\t";
    my $cnt_new = $clh->get_one_field($query_new) // 0;
    print "$cnt_new\t";
    print "".($cnt_new == $cnt_old ? 'ok' : 'FAIL---------------------------------------------------')."\n";
}

sub move_for_date
{
    my $date = shift;

    my $date_sql = $date =~ s!(\d{4})(\d{2})(\d{2})!$1-$2-$3!r;

    my ($month) = ($date =~ /^(\d{6})/);

    unless ($known{$opt{table}}) {
        #die "don't know how to work with this table\n";
    }

    my %group_by = map { $_ => 1 } @{$known{$opt{table}}{group_by}};

    my $schema = DScribe::ClickhouseTable->new($opt{table});

    my $fields = $schema->db_fields;

    my @SQL;

    push @SQL, ('INSERT INTO' => $opt{out});

    push @SQL, 'SELECT';
    my @fields;
    for my $f (@$fields) {
        if ($group_by{$f}) {
            push @fields, $f;
        }
        else {
            if ($opt{group}) {
                push @fields, "any($f) as $f";
            }
            else {
                push @fields, $f;
            }
        }
    }
    push @SQL, join ', ', @fields;
    push @SQL, FROM => "$opt{table}_$month";
    push @SQL, WHERE => "log_date = toDate('$date_sql')";
    if ($opt{group}) {
        push @SQL, 'GROUP BY' => join ', ', @{$known{$opt{table}}{group_by}};
    }

    print ''.(join ' ', @SQL)."\n";

    if (!defined $opt{dry}) {
        $clh->query(join ' ', @SQL);
    }
}

sub check_table
{
    state $cache = {};
    my ($yyyymm) = shift;
    return $cache->{$yyyymm} if exists $cache->{$yyyymm};
    eval {
        $clh->query("desc $opt{table}_$yyyymm");
    };
    return $cache->{$yyyymm} = $@ ? 0 : 1;
}

sub create_merge_table
{
    my $table = shift;
    my $month_schema = $clh->get_one_field("show create table ${table}_201501");
    # CREATE TABLE default.ppclog_api_201501 ( log_date Date,  log_time DateTime,  cid Array(UInt64),  bid Array(UInt64),  ip String,  cmd String,  runtime Float32,  param String,  http_status UInt16,  cluid Array(UInt64),  reqid UInt64,  uid UInt64,  host String,  proc_id UInt32,  fulltime Float32,  error_detail String,  units UInt32,  units_stats String,  api_version UInt8,  interface String,  application_id String,  source String) ENGINE = MergeTree(log_date, (log_date, log_time), 8192)
    $month_schema =~ s!^CREATE TABLE !create table if not exists !;
    $month_schema =~ s!default\.${table}_201501!${table}_merge!;
    my $month_re = '_\\\\d+$';
    $month_schema =~ s!ENGINE = MergeTree\(.+?, \d+\)!ENGINE = Merge(default, '$table$month_re')!;
    print $month_schema."\n";
    $clh->query($month_schema);
}

sub date2yyyymm
{
    my ($date) = @_;
    my ($month) = ($date =~ /^(\d{6})/);
    return $month;
}

sub unix2ymd
{
    my $ts = shift;
    my $dt = unix2mysql($ts);
    $dt =~ s!^(\d{8})\d+$!$1!;
    return $dt;
}

__END__
bsexport_data bsexport_prices dbshards_ids mediaplan moderate ppclog_api ppclog_cmd

