package Application::Model::Product::SSP::ImpressionLog;

use qbit;

use base qw(QBit::Application::Model QBit::Application::Model::API::HTTP);

use String::Escape;

use Utils::Logger qw/INFO INFOF WARN WARNF/;

use Utils::MonitoringUtils qw/send_to_graphite/;

use Exception::SSP::BadData;

use PiConstants qw/$OS_ID_ANDROID $OS_ID_IOS $OS_ID_WIN $OS_ID_OTHER/;

sub accessor {'ssp_imps'}

__PACKAGE__->model_accessors(
    partner_db      => 'Application::Model::PartnerDB::SSP',
    ssp_application => 'Application::Model::Product::SSP::Application',
    ssp_site        => 'Application::Model::Product::SSP::Site',
    ssp_seller      => 'Application::Model::Product::SSP::Seller',
    api_yql         => 'QBit::Application::Model::API::Yandex::YQL',
);

our $MIN_ROWS              = 9;
our $MAX_LEN_SELLER_TOKENS = 9_000;
our $QUERY_EXECUTION_TIME  = 170;

our @OS_DICT = (
    'android'          => $OS_ID_ANDROID,
    'ios'              => $OS_ID_IOS,
    'iPhone OS'        => $OS_ID_IOS,
    'Windows Phone OS' => $OS_ID_WIN,
    'windowsphone'     => $OS_ID_WIN,
    'RIM Tablet OS'    => $OS_ID_OTHER,
    'Symbian OS'       => $OS_ID_OTHER,
    'Unknown'          => $OS_ID_OTHER,
    'webOS'            => $OS_ID_OTHER,
);

# returns: (bundle_id, apple_id)
sub extract_app_ids {
    my ($self, $imp) = @_;

    die unless $imp->{source_app};

    my $str = $imp->{source_app};

    my ($bundle_id, $apple_id) = ('', '');

    my $os_id = $self->extract_app_os_id($imp);

    if ($os_id == $OS_ID_ANDROID) {
        $bundle_id = $str;
    } elsif ($os_id == $OS_ID_IOS) {
        $apple_id = $str;
    }

    return ($bundle_id, $apple_id);
}

# returns: \d+ (0 - unknown)
sub extract_app_os_id {
    my ($self, $imp) = @_;

    die unless $imp->{source_app_os};

    my $str = $imp->{source_app_os};

    my $out = undef;
    my @os  = @OS_DICT;

    while (my ($os, $os_id) = splice(@os, 0, 2)) {
        $out = $os_id if ref($os) eq 'Regexp' ? $str =~ /$os/ : uc($str) eq uc($os);
        last if $out;
    }

    INFOF(q["Unknown mobile OS. (source_app_os="%s"; imp=%s)], $str, to_json($imp)) unless $out;

    return $out || 0;
}

sub get_sources {
    my ($self, $date_from, $date_to) = @_;

    $date_from //= curdate(oformat => 'db');
    $date_to //= $date_from;

    my $list = $self->partner_db->query->select(
        table  => $self->partner_db->ssp_impression_log,
        fields => {
            'source_type'   => {min          => ['source_type']},
            'source_app'    => {min          => ['source_app']},
            'source_app_os' => {min          => ['source_app_os']},
            'source_domain' => {min          => ['source_domain']},
            'source_token'  => {group_concat => [{distinct => ['source_token']}]},
            'hits'          => {sum          => ['hits']},
            'period_days'   => {datediff     => [\$date_to, \$date_from]},
            'seller_id'      => '',
            'application_id' => '',
            'site_id'        => '',
        },
        filter => [
            AND => [
                [date => '>=', \$date_from],
                [date => '<=', \$date_to],

            ],
        ],
    )->group_by('application_id', 'site_id', 'seller_id')->get_all();

    $_->{source_token} = [split(',', $_->{source_token})] foreach @$list;

    $list = [
        grep({
                my $tokens = $_->{source_token};
                  if (!$_->{application_id} && !$_->{site_id}) {
                    0;
                } elsif (@$tokens > 1 && length(to_json($tokens)) >= $MAX_LEN_SELLER_TOKENS) {
                    WARNF(
                        "SSP error. Too much (%d) tokens (\"%s\", ...) for one SSP (%d)",
                        scalar(@{$tokens}),
                        $tokens->[0], $_->{seller_id}
                    );
                    0;
                } else {
                    1;
                }
            } @$list)
    ];

    return $list;
}

=head2 update_data
Получаем данные за дату (сегодня) из БК
=cut

sub update_data {
    my ($self, $date) = @_;
    our $MIN_ROWS;

    $date ||= curdate(oformat => 'db');

    send_to_graphite(
        interval => 'one_hour',
        value    => dates_delta_days($date, curdate(oformat => 'db'), iformat => 'db'),
        path     => 'SSP.ImpressionLog.impressions_process_age',
        solomon  => {
            metric => 'process_age',
            sensor => 'SSP.ImpressionLog.impressions',
        }
    );

    my $ssp_log = $self->get_ssp_impression_log($date, $date);

    send_to_graphite(
        interval => 'one_hour',
        value    => scalar(@$ssp_log),
        path     => 'SSP.ImpressionLog.impressions_count_bk',
        solomon  => {
            metric => 'count_bk',
            sensor => 'SSP.ImpressionLog.impressions',
        }
    );

    $ssp_log = $self->_update_data_normalize($ssp_log);

    send_to_graphite(
        interval => 'one_hour',
        value    => scalar(@$ssp_log),
        path     => 'SSP.ImpressionLog.impressions_count_normalized',
        solomon  => {
            metric => 'count_normalized',
            sensor => 'SSP.ImpressionLog.impressions',
        }
    );

    throw Exception::SSP::BadData gettext('No data (date: %s, rows: %s)', $date || '-', @$ssp_log || '-')
      unless $ssp_log && ref($ssp_log) eq 'ARRAY' && @$ssp_log > $MIN_ROWS;

    my %seller_ids = map {$_->{id} => 1} @{$self->ssp_seller->get_all(fields => qw(id))};

    my $_mobile_app_list = $self->_get_mobile_apps($ssp_log);

    my $content = [];
    my ($cnt_app_ok, $cnt_app_missed, $cnt_site_ok, $cnt_site_missed, $cnt_all) = (0, 0, 0, 0, 0);
    my @missed;
    foreach my $rec (@$ssp_log) {

        next if $self->get_option('debug', 0) && scalar(@$content) > 1000;

        unless ($seller_ids{$rec->{'SSPID'}}) {
            WARN {
                message => sprintf("Unknown Seller ID %s", $rec->{'SSPID'}),
                fingerprint => ['Cron', 'SSP', 'ImpressionLog', 'UnknownSeller'],
            };
            next;
        }

        my %r = (
            date            => $rec->{'UpdateTime'},
            source_type     => $rec->{'Type'},
            hits            => $rec->{'Hits'},
            seller_id       => $rec->{'SSPID'},
            page_id         => $rec->{'PageID'},
            source_token_id => $rec->{'ExportTokenMD5'},
            source_token    => $rec->{'ExportToken'},
            map {$_ => undef} qw(source_app source_app_os application_id source_domain source_domain_formats site_id)
        );

        if (in_array($r{source_type}, ['app-media', 'app-video'])) {
            $r{source_app}    = $rec->{ExportDomain};
            $r{source_app_os} = $rec->{ExportOS};
        } elsif (in_array($r{source_type}, ['media', 'video'])) {
            $r{source_domain}         = $rec->{ExportDomain};
            $r{source_domain_formats} = '';                     # not implemeted (BSDEV-53798)
        } else {
            WARN {
                message     => 'Invalid record in impression log',
                extra       => {data => \%r,},
                fingerprint => ['Cron', 'SSP', 'ImpressionLog', 'InvalidRecord'],
            };
            next;
        }

        # линкуем записи БК со справочником приложений или сайтов (создаем приложение, если нет)
        if ($r{source_app}) {
            my $os_id = $self->extract_app_os_id(\%r);

            if ($os_id == $OS_ID_ANDROID || $os_id == $OS_ID_IOS) {
                my $app_id = $_mobile_app_list->{$os_id}->{$r{source_app}};
                if ($app_id) {
                    $r{application_id} = $app_id;
                    $cnt_app_ok++;
                } else {
                    $cnt_app_missed++;
                    push @missed, $rec;
                    INFOF("Applicaion not found for impression \"%s\"", to_json(\%r));
                }
            }
        }
        if ($r{source_domain}) {
            my $site = $self->ssp_site->find_by_imp(\%r);
            if ($site) {
                $r{site_id} = $site->{id};
                $cnt_site_ok++;
            } else {
                $cnt_site_missed++;
                push @missed, $rec;
                INFOF("Site not found for impression \"%s\"", to_json(\%r));
            }
        }

        push(@$content, \%r);

        $cnt_all += 1;

        if ($cnt_all % 10000 == 0 || $cnt_all == @$ssp_log) {
            send_to_graphite(
                interval => 'one_hour',
                value    => scalar(@$content),
                path     => 'SSP.ImpressionLog.impressions_count_processed',
                solomon  => {
                    metric => 'count_processed',
                    sensor => 'SSP.ImpressionLog.impressions',
                }
            );
            send_to_graphite(
                interval => 'one_hour',
                value    => $cnt_app_ok,
                path     => 'SSP.ImpressionLog.impressions_count_processed_app_ok',
                solomon  => {
                    metric => 'count_processed_app_ok',
                    sensor => 'SSP.ImpressionLog.impressions',
                }
            );
            send_to_graphite(
                interval => 'one_hour',
                value    => $cnt_app_missed,
                path     => 'SSP.ImpressionLog.impressions_count_processed_app_missed',
                solomon  => {
                    metric => 'count_processed_app_missed',
                    sensor => 'SSP.ImpressionLog.impressions',
                }
            );
            send_to_graphite(
                interval => 'one_hour',
                value    => $cnt_site_ok,
                path     => 'SSP.ImpressionLog.impressions_count_processed_site_ok',
                solomon  => {
                    metric => 'count_processed_site_ok',
                    sensor => 'SSP.ImpressionLog.impressions',
                }
            );
            send_to_graphite(
                interval => 'one_hour',
                value    => $cnt_site_missed,
                path     => 'SSP.ImpressionLog.impressions_count_processed_site_missed',
                solomon  => {
                    metric => 'count_processed_site_missed',
                    sensor => 'SSP.ImpressionLog.impressions',
                }
            );
        }
    }

    INFO($date . ': ' . scalar(@missed) . ' missed impression log records');
    INFO Dumper([sort {$b->{Hits} <=> $a->{Hits}} @missed]);

    my $added;
    $self->partner_db->transaction(
        sub {
            $self->partner_db->ssp_impression_log->delete($self->partner_db->filter({'date' => $date}));

            $added = $self->partner_db->ssp_impression_log->add_multi($content);

            send_to_graphite(
                interval => 'one_hour',
                value    => $added,
                path     => 'SSP.ImpressionLog.impressions_count_added_db',
                solomon  => {
                    metric => 'count_added_db',
                    sensor => 'SSP.ImpressionLog.impressions',
                }
            );

            throw Exception::SSP::BadData gettext('Bad data (added - %d, total - %d)', $added, @$content)
              unless $added == @$content;
        }
    );
}

sub get_ssp_impression_log {
    my ($self, $from_date, $to_date) = @_;

    my %opts;

    $opts{'startdate'} = $from_date;
    $opts{'stopdate'} ||= $to_date;

    foreach (sort keys(%opts)) {
        throw sprintf('Expected date format YYYY-mm-dd. got "%s"', $opts{$_} // 'undef')
          unless check_date($opts{$_}, iformat => 'db');
        $opts{$_} = trdate("db", "sec", $opts{$_});
    }

    my $ssp_log = $self->api_yql->yql_start_operation_and_get_result(
        clusters     => $self->get_option('yql_clusters'),
        start_params => {
            params => {
                content => sprintf(
'SELECT th_updatetime AS UpdateTime, th_sspid AS SSPID, th_pageid AS PageID, th_exporttoken AS ExportToken,
    th_exportdomain AS ExportDomain, th_exportos AS ExportOS, th_type AS Type, TotalHits
FROM
    (
    SELECT
    th_updatetime, th_sspid, th_pageid, th_exporttoken, th_exportdomain, th_exportos, th_type, TotalHits,
    ROW_NUMBER() OVER w AS rows_count_in_window
    FROM
    (
    SELECT
    thits.UpdateTime as th_updatetime, thits.SSPID AS th_sspid, thits.PageID as th_pageid, thits.ExportToken as th_exporttoken,
    thits.ExportDomain as th_exportdomain, thits.ExportOS as th_exportos, thits.Type as th_type, SUM(thits.Hits) AS TotalHits
    FROM `home/yabs/stat/RTBExportPagesStat` AS thits
    LEFT JOIN `home/yabs/dict/SSPPageMapping` AS mapping ON (thits.SSPID = mapping.SSPID AND thits.ExportToken = mapping.PageToken)
    WHERE mapping.SSPID IS NULL
    AND UpdateTime BETWEEN %d AND %d
    AND Hits > 10
    GROUP BY thits.UpdateTime, thits.SSPID, thits.PageID, thits.ExportToken, thits.ExportDomain, thits.ExportOS, thits.Type
    ORDER BY th_updatetime, TotalHits DESC
    LIMIT 1000000
    ) AS ttotal
    WINDOW w AS (
        PARTITION BY th_sspid
        ORDER BY ttotal.TotalHits DESC
    )
)
WHERE rows_count_in_window < 26', $opts{'startdate'}, $opts{'stopdate'} || $opts{'startdate'}
                )
            }
        },
        get_params => {
            format                   => 'json',
            sleep_before_first_check => $QUERY_EXECUTION_TIME
        }
    );

    # to sum up dupes, SEE https://st.yandex-team.ru/PI-13802#5c12718484f4c9001bb9c905
    my %data_normalized;
    foreach (@{from_jsonl($ssp_log)}) {
        my $key = join('__', ($_->{UpdateTime}, $_->{SSPID}, $_->{ExportToken}, $_->{Type}, $_->{PageID}));
        if (exists $data_normalized{$key}) {
            $data_normalized{$key}->{Hits} += $_->{TotalHits};
        } else {
            $data_normalized{$key} = $_;
            $data_normalized{$key}->{ExportTokenMD5} = Yandex::Utils::md5int($_->{ExportToken});
            $data_normalized{$key}->{UpdateTime} = trdate("sec", "db_time", $_->{UpdateTime});
            $data_normalized{$key}->{Hits} = $_->{TotalHits};
        }
    }

    return [map {$data_normalized{$_}} sort keys %data_normalized];
}

sub _update_data_normalize {
    my ($self, $data) = @_;
    my $out = [];

    return unless $data && ref($data) eq 'ARRAY';

    foreach my $row (@$data) {

        next unless $row->{SSPID};
        next unless $row->{PageID} && $row->{PageID} > 1;
        next unless $row->{Type} && $row->{Type} =~ /^[\w \-]+$/i;
        next unless $row->{ExportTokenMD5} && length($row->{ExportTokenMD5}) > 10;
        next unless $row->{ExportToken}    && length($row->{ExportToken}) > 1 && length($row->{ExportToken}) < 1024;
        next unless $row->{ExportDomain}   && length($row->{ExportDomain}) > 1 && length($row->{ExportDomain}) < 255;
        next
          unless (!$row->{ExportOS} && in_array($row->{Type}, [qw(media video)]))
          || ($row->{ExportOS} && $row->{ExportOS} =~ /^[\w \-]+$/i && length($row->{ExportOS}) < 128);

        foreach my $k (qw(ExportToken ExportDomain ExportOS)) {
            $row->{$k} = String::Escape::printable($row->{$k});
        }

        push(@$out, $row);
    }

    return $out;
}

sub _get_mobile_apps {
    my ($self, $ssp_log) = @_;

    my $_mobile_app_list_h = {
        $OS_ID_ANDROID => {},
        $OS_ID_IOS     => {},
    };
    my $_missing_app_list = {
        $OS_ID_ANDROID => [],
        $OS_ID_IOS     => [],
    };

    my @ssp_log_mobile_app =
      map {
        my %r = (
            date            => $_->{'UpdateTime'},
            source_type     => $_->{'Type'},
            hits            => $_->{'Hits'},
            seller_id       => $_->{'SSPID'},
            page_id         => $_->{'PageID'},
            source_token_id => $_->{'ExportTokenMD5'},
            source_token    => $_->{'ExportToken'},
            source_app      => $_->{'ExportDomain'},
            source_app_os   => $_->{'ExportOS'},
        );
        $r{store_id} = $self->extract_app_os_id(\%r);
        \%r;
      }
      grep {
        in_array($_->{Type}, ['app-media', 'app-video'])
      } @$ssp_log;

    # trying to get apps from DB
    my $in_db_ssp_app_list_h = $self->ssp_application->find_by_imp_list(\@ssp_log_mobile_app);

    foreach my $ssp_log_entry (@ssp_log_mobile_app) {
        if ($OS_ID_ANDROID == $ssp_log_entry->{store_id} || $OS_ID_IOS == $ssp_log_entry->{store_id}) {
            my @app = grep {_app_equals($ssp_log_entry, $_)} @$in_db_ssp_app_list_h;
            # app found in DB, save its id to return struct
            if (@app) {
                $_mobile_app_list_h->{$ssp_log_entry->{store_id}}->{$ssp_log_entry->{source_app}} = $app[0]->{id};
                # app not found in DB, will try to get it from YT
            } else {
                push @{$_missing_app_list->{$ssp_log_entry->{store_id}}}, $ssp_log_entry;
            }
        } else {
            INFOF("Unsupported OS found for impression \"%s\"", to_json($ssp_log_entry));
        }
    }
    # get apps from YT
    my $from_yt_ssp_app_list = $self->app->api_http_maas->get_apps_from_yt($_missing_app_list);
    # save found apps 2 DB
    my $saved_ssp_app_list = $self->ssp_application->create_app_list($from_yt_ssp_app_list);
    # add application_id to return struct for newly added apps
    foreach my $saved_ssp_app (@$saved_ssp_app_list) {
        $_mobile_app_list_h->{$saved_ssp_app->{store_id}}->{$saved_ssp_app->{bundle_id}} =
          $saved_ssp_app->{application_id};
    }

    return $_mobile_app_list_h;
}

sub _app_equals {
    my ($app_from_imp, $app_from_db) = @_;

    return ($app_from_db->{store_id} == $app_from_imp->{store_id})
      && (
        ($OS_ID_ANDROID == $app_from_db->{store_id} && $app_from_imp->{source_app} eq $app_from_db->{bundle_id})
        || (
            $OS_ID_IOS == $app_from_db->{store_id}
            && (   ($app_from_db->{apple_id} =~ /^(?:id)?$app_from_imp->{source_app}$/)
                || ($app_from_imp->{source_app} eq $app_from_db->{bundle_id}))
           )
         );
}

TRUE;
