package DScribe::Grep::Clickhouse;

use Mouse;
use utf8;

use DScribe::ClickhouseTable;
use List::MoreUtils qw/none any zip/;
use Time::Piece;
use Yandex::Clickhouse;
use Time::HiRes qw/gettimeofday tv_interval/;
use Path::Tiny;
use JSON;
use Yandex::HashUtils qw/hash_merge/;
use open ':std' => ':utf8';

extends 'DScribe::Grep';

has type => ( is => 'ro', isa => 'Str', required => 1 );
has table => (is => 'ro', isa => 'Object', required => 1);
has verbose => (is => 'ro', isa => 'Bool');
has dry_run => (is => 'ro', isa => 'Bool');
has limit => (is => 'ro', isa => 'Int', default => 100_000, required => 1);
has less => (is => 'ro', isa => 'Bool');
has file => (is => 'ro', isa => 'Str');
has clickhouse => (is => 'ro', isa => 'Object', required => 1);
has debug => (is => 'ro', isa => 'Bool', default => 0);
has line_count => (is => 'rw', isa => 'Int', default => 0);
has result => (is => 'rw', isa => 'ArrayRef');
has runtime => (is => 'rw', isa => 'Num');
has query => (is => 'rw', isa => 'Str');
has host => (is => 'rw', isa => 'Str');
has additional_sql => (is => 'rw', 'isa' => 'ArrayRef[Str]', default => sub {[]});


sub BUILDARGS
{
    my ($self, $type, %opt) = @_;
    my $table = DScribe::ClickhouseTable->new($type);
    # # NO_PRODUCTION
    # $Yandex::Log::LOG_ROOT = 'var/log';
    # $Yandex::Clickhouse::QUERIES_LOG = 'clh.log';
    #$opt{profile} //= 'heavy';
    return {
        type => $type,
        table => $table,
        clickhouse => Yandex::Clickhouse->new(
            host => $opt{dbhost} // 'ppchouse-cloud.direct.yandex.net', # NO_PRODUCTION
            port => 8443,
            use_https => 1,
            timeout => 8*3600,
            settings => {
                ( $opt{profile} ? (profile => $opt{profile}) : () ),
                max_block_size => 1024,
            },
            user => $opt{clickhouse_user} // 'readonly',
            # host => $opt{clickhouse_host},
            # port => $opt{clickhouse_port},
        ),
        (map { defined $opt{$_} ? ($_ => $opt{$_}) : () } qw/verbose dry_run limit less file debug/),
    };
}

=head2 options

Функция возвращает набор опций, пригодный для Getopt::Long

=cut

sub options
{
    my ($self) = @_;
    my @opt = map { $self->_field2getopt($_) } @{$self->table->table_fields};
    return \@opt;
}

=head2 grep_options

Опции, специфичные для данного grep (хранилища)

=cut

sub grep_options
{
    return (
        'dbhost|H=s',
        'profile|P=s',
    );
}

sub table_name
{
    my $self = shift;
    return $self->table->table_name();
}

sub logs_stat
{
    my ($self, $opt) = @_;
    my $fields = $opt->{stat} || 'log_date';
    my @fields = split /\s*,\s*/, $fields;
    my $order = join ",", @fields;
    my ($from_dt, $to_dt) = $self->_get_dates_from_options($opt);
    # local $Yandex::Log::LOG_ROOT = lib::abs::path('../..').'/var/log/';
    # local $Yandex::Clickhouse::QUERIES_LOG = 'clh.log';
    my $where = $self->where($opt);
    my $prewhere = $self->prewhere($opt);
    my $format = 'TabSeparatedWithNames';
    $self->clickhouse->query_format($format);
    my $t0 = [gettimeofday];
    my $query = [
        select => "count() as cnt, $fields",
        from => $self->table_name,
        ($prewhere ? (prewhere => $prewhere) : ()), 
        ($where ? (where => $where) : ()),
        'group by' => $fields,
        'order by' => $order,
    ];
    $self->query($self->clickhouse->format($query));
    if ($opt->{cb}) {
        return $self->clickhouse->query_async($query, sub { 
            my $elapsed = tv_interval ( $t0 );
            $self->result(shift->tsv(names => 1));
            $self->runtime($elapsed);
            $opt->{cb}->();
        });
    }
    if ($self->verbose) {
        print STDERR $self->query."\n";
    }
    my $res = $self->clickhouse->query($query);
    my $elapsed = tv_interval ( $t0 );
    $self->runtime($elapsed);
    $opt->{print} //= 1;
    $self->result($res->tsv(names => 1));
    
    if ($opt->{print}) {
        for my $r (@{$self->result}) {
            my $cnt = $r->{cnt};
            printf "% 12d\t", $cnt;
            print join "\t", @{$r}{@fields};
            print "\n";
        }
    }
}

=head2 grep_logs

=cut

sub grep_logs
{
    my ($self, $opt) = @_;

    $opt = $self->preprocess($opt);

    my $query = $self->build_query($opt);
    $self->query($query);
    if ($self->verbose) {
        print STDERR "$query\n";
    }
    if ($self->file) {
        $self->clickhouse->output_file($self->file);
    }
    my $format = $self->file ? 'TabSeparated' : 'TabSeparatedWithNames';
    $self->clickhouse->query_format($format);
    my $t0 = [gettimeofday];
    if ($opt->{cb}) {
        # при обработке запроса из Mojolicious мы не хотим создавать еще один condvar и блокироваться
        return $self->clickhouse->query_async($query, sub {
            my $elapsed = tv_interval ( $t0 );
            my $res = shift;
            utf8::decode($res->{data});
            utf8::decode($res->{data});
            $self->result($res->tsv(names => 1));
            $self->postprocess();
            $self->runtime($elapsed);
            $opt->{cb}->();
        });
    }
    my $res = $self->clickhouse->query($query);
    my $elapsed = tv_interval ( $t0 );
    $self->runtime($elapsed);
    if (!$self->file) {
        $self->result($res->tsv(names => 1));
        $self->postprocess();
        return;
    }
    $self->postprocess();
    my $file = $self->file;
    printf "%d lines in %s\nresult file: $file\n", $self->line_count, _format_time($elapsed);
    if ($self->less) {
        system "less '$file'";
    }
}

sub build_query
{
    my ($self, $opt) = @_;
    my @query;
    my $where = $self->where($opt);
    my $prewhere = $self->prewhere($opt);
    push @query, 
        # 'SELECT' => '*',
        'SELECT' => ( join ',', $self->select_fields ),
        'FROM' => $self->table_name,
        ($prewhere ? (prewhere => $prewhere) : ()), 
        ($where ? (where => $where) : ()),
        # ( $self->can('group_by') ? $self->group_by : () ),
        # $self->order_sql,
        $self->limit_sql;
    return join "\n", @query;
}

sub _where_options
{
    my $self = shift;
    return map { $self->_name_alternatives($_) } @{$self->table->db_fields};
}

sub _name_alternatives
{
    my ($self, $name) = @_;
    my $field_desc = $self->table->fields_hash->{$name};
    
    my @opts;
    push @opts, $name;
    if ($field_desc->{like}) {
        push @opts, "$field_desc->{name}_like","$field_desc->{name}-like";
    }
    if ($field_desc->{force_like}) {
        push @opts, "$field_desc->{name}_eq", "$field_desc->{name}-eq";
    }
    return @opts;
}

sub prewhere
{
    my ($self, $opt) = @_;
    if ( (none { defined $opt->{$_} } $self->_where_options) && !defined $opt->{stat} && !defined $opt->{where}) {
        if ($self->limit <= 100) {
            return ();
        }
        die "None of required parameters provided\nSee --type ".$self->type." --help";
    }

    my @prewhere = 
        map { $self->param2query($_ => $opt) }
        grep { ! $self->table->field_get_prop($_ => 'json') } # тяжелые условия с like отправляем в WHERE
        grep { !$opt->{skip_fields} || !exists $opt->{skip_fields}->{$_} }
        grep { any { defined $opt->{$_} } $self->_name_alternatives($_) }
        @{$self->table->db_fields};

    if ($opt->{where}) {
        push @prewhere, $opt->{where};
    }

    if (@prewhere) {
        return join ' AND ', @prewhere;
    }
    return '';
}

sub param2query
{
    my $self = shift;
    my $field = shift;
    my $opt = shift;
    my $field_value = $opt->{$field} // '';
    utf8::decode($field_value);
    my @in = split /\s*,\s*/, $field_value;
    
    # TODO два почти одинаковых блока, подумать как упростить
    if ($self->table->field_get_prop($field => 'force_like')) {
        my $field_eq = "${field}_eq";
        if (defined $opt->{$field_eq}) {
            return sprintf "$field = '%s'", $opt->{$field_eq};
        }
        elsif (defined $field_value) {
            return "$field LIKE '%$field_value%'";
        }
        else {
            die "unexpected value for $field";
        }
    }
    
    if ($self->table->field_get_prop($field => 'like')) {
        my $field_like = $field."_like";
        if ($opt->{$field_like}) {
            return sprintf "$field LIKE '%%%s%%'", $opt->{$field_like};
        }
        elsif ($field_value) {
            return sprintf "$field in (%s)", join ',', map { "'$_'" } @in;
        }
        else {
            die "unexpected value for $field";
        }
    }

    my $type = $self->table->field_get_prop($field => 'type');
    if ($type =~ /Array\((.+)\)/) {
        my $type = $1;
        return sprintf "has($field, to$type('%s'))", $field_value;
    }
    if ($type ne 'String') {
        return sprintf "$field = to$type('%s')", $field_value;
    }
    return sprintf "$field in (%s)", join ',', map { "'$_'" } @in;
}

sub where
{
    my ($self, $opt) = @_;
    my ($from_dt, $to_dt) = $self->_get_dates_from_options($opt);

    my @where;
    push @where,
        sprintf("log_date >= toDate('%s')", $from_dt),
        sprintf("log_date <= toDate('%s')", $to_dt),
        (
            map { $self->param2query($_ => $opt) }
            grep { $self->table->field_get_prop($_ => 'json') }
            grep { !$opt->{skip_fields} || !exists $opt->{skip_fields}->{$_} }
            grep { defined $opt->{$_} }
            @{$self->table->db_fields} 
        );

    push @where, @{$self->additional_sql};

    return join ' AND ', @where;
}

sub select_fields
{
    my ($self) = @_;
    my @fields = map { $_->{name} } grep { !$_->{hide} } @{$self->table->table_fields};
    return @fields;
}

sub limit_sql
{
    my ($self) = @_;
    return "LIMIT ".$self->limit;
}

sub _get_dates_from_options
{
    my ($self, $opt) = @_;
    
    if (defined $opt->{date_to} && !defined $opt->{date_from}) {
        die "error: date-to without date-from, stop";
    }
    my $t = localtime;
    if (!defined $opt->{date_to} && defined $opt->{date_from}) {
        $opt->{date_to} = $t->ymd;
    }


    my $cond_count = scalar grep { defined $opt->{$_} } qw/today week month date_from date last_days/;
    unless ($cond_count == 1) {
        die "error: too many or too few date conditions, stop ($cond_count)";
    }

    if (defined $opt->{today}) {
        $opt->{date_to} = $opt->{date_from} = $t->ymd;
        delete $opt->{today};
    }
    if (defined $opt->{week}) {
        $opt->{date_to} = $t->ymd;
        my $f = $t - 86400*7;
        $opt->{date_from} = $f->ymd;
        delete $opt->{week};
    }
    if (defined $opt->{month}) {
        $opt->{date_to} = $t->ymd;
        $opt->{date_from} = $t->add_months(-1)->ymd;
        delete $opt->{month};
    }
    if (defined $opt->{date}) {
        $opt->{date_to} = $opt->{date_from} = $opt->{date};
        delete $opt->{date};
    }
    if (defined $opt->{last_days}) {
        die "too many or too few last-days, stop" unless $opt->{last_days} > 0;
        $opt->{date_to} = $t->ymd;
        my $f = $t - 86400*$opt->{last_days};
        $opt->{date_from} = $f->ymd;
        delete $opt->{last_days};
    }

    for (qw/date_from date_to/) {
        if ($opt->{$_} =~ /^(\d{4})(\d{2})(\d{2})$/) {
            $opt->{$_} = "$1-$2-$3";
        }
        die "bad $_: '$opt->{$_}', stop" unless $opt->{$_} =~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/;
    }

    return ($opt->{date_from}, $opt->{date_to});
}

sub usage
{
    my $self = shift;
    my $options = $self->options();
    print "Valid options for this type:\n\t".
        (join "\n\t", map { "--$_" } @$options)."\n";
}

sub postprocess
{
    my $self = shift;
    if ($self->result) {
        $self->postprocess_result();
    }
    else {
        $self->postprocess_file();
    }
}

sub postprocess_result
{
    my $self = shift;
    for my $row (@{$self->result}) {
        hash_merge $row, $self->postprocess_line($row);
    }
}

sub postprocess_parse_line
{
    my ($self, $line) = @_;
    my @fields = $self->select_fields;
    utf8::decode($line);
    my $row = { zip @fields, @{[(split /\t/, $line)]} };

}

sub postprocess_file
{
    my $self = shift;
    my $file = $self->file;
    my $tmp = "$file.tmp";
    open my $IN, '<', $file or die "$file: $!";
    open my $OUT, '>', $tmp or die "$tmp: $!";
    my $cnt = 0;
    my (%uids);
    while (<$IN>) {
        my $row = $self->postprocess_parse_line($_);
        $row = $self->postprocess_line($row);
        next unless $row->{log_time};
        print $OUT $row->{log_time}."\t".to_json($row)."\n";
        $cnt++;
    }
    close $IN;
    close $OUT;
    rename $tmp => $file;
    $self->line_count($cnt);

    # сортируем результат, лексикографически, надеясь, что в начале строки -- дата и время
    rename $file => $tmp;
    system("sort $tmp -o $file") == 0 or die "can't sort file $tmp";
    unlink $tmp;
}

=head2 postprocess_line


=cut

sub postprocess_line
{
    my ($self, $row) = @_;
    return $row unless $row->{log_time};
    for my $f (grep { 
            $self->table->field_get_prop($_ => 'json') || 
            $self->table->field_get_prop($_ => 'type') =~ /Array/ 
        } @{$self->table->db_fields}
    ) {
        $row->{$f} = eval { from_json($row->{$f}) } // $row->{$f};
        if ($@) {
            # правим криво закодированный json
            # TODO разобраться, почему так получается
            $row->{$f} =~ s!\\"!\"!g;
            $row->{$f} = eval { from_json($row->{$f}) } // $row->{$f};
            # die $@ if $@;
        }
        if (ref $row->{$f} eq 'HASH') {
            # пытаемся декодировать вложеный json
            while (my ($k, $v) = each %{$row->{$f}}) {
                if ($v && $v =~ /^[\[\{]/) {
                    $row->{$f}->{$k} = eval { from_json($v) } // $row->{$f}->{$k};
                    # print STDERR $@ if $@;
                    if ($@) {
                        $v =~ s!\\"!\"!g;
                        $row->{$f}->{$k} = eval { from_json($v) } // $row->{$f}->{$k};
                    }
                }
            }
        }
    }
    return $row;
}

sub add_sql_condition
{
    my ($self, $sql) = @_;
    push @{$self->additional_sql}, $sql;
}

=head2 _field2getopt

    Format options for Getopt::Long

    In: { name => 'column_name', type => 'clickhouse type' }
    Out: "column_name=[s|i]"

=cut

sub _field2getopt
{
    my $self = shift;
    my $field = shift;
    my @opts;
    push @opts, $field->{name} . '=' . ( $field->{type} =~ /int/i ? 'i' : 's' );
    if ($field->{like})  {
        push @opts, $field->{name} . "_like|$field->{name}-like=s";
    }
    if ($field->{force_like}) {
        push @opts, $field->{name} . "_eq|$field->{name}-eq=s";
    }
    return @opts;
}

sub _format_time
{
    my $sec = shift;
    if ($sec < 60) {
        return sprintf "%.2f sec", $sec;
    }
    if ($sec < 3600) {
        return sprintf "%d min %d sec", $sec/60, $sec % 60,
    }
    return sprintf "%d hr, %d min %d sec", $sec/3600, ($sec%3600)/60, $sec%60;
}

1;
