package Utils::LogParser::Checks;

use qbit;

use Utils::LogParser;
use Utils::Logger qw(INFO);
use Utils::MonitoringUtils qw(get_pjapi send_to_graphite);
use PiConstants qw($DISK_STORAGE_DIRECTORY_PATH $LOGS_DIRECTORY_PATH);

use Scalar::Util qw(looks_like_number);
use List::Util qw(sum);

my $products = join '|', qw(
  context search mobile video
  product user-settings financial devel
  sites users internal indoor outdoor
  suggest inviter offer site
  );

our $LOGS = {
    cron_internal_an_regular_update_statistics =>
      {path => "partners.yandex.ru/partner2-cron-internal_adv_net-regular_update_statistics.err",},
    cron_stat_direct_regular_update_statistics =>
      {path => "partners.yandex.ru/partner2-cron-stat_direct-regular_update_statistics.err",},
    cron_adv_net_video_regular_update_statistics_for_month =>
      {path => "partners.yandex.ru/partner2-cron-adv_net_video-regular_update_statistics_for_month.log",},
    partner2_rosetta   => {path => "partners.yandex.ru/partner2_rosetta.log",},
    kern_log           => {path => "kern.log"},
    jsonapi_access_log => {
        path     => "nginx/jsonapi.partner2.yandex.com.access-post.log",
        preparse => \&access_log_preparse,
        groups_regex =>
          [['jsonapi_alive' => [qr"alive"]], ['jsonapi' => [qr"^/v1/$products"]], ['jsonapi_others' => [qr""]],],
        field_labels => sub {'endpoint_group' => $_[0]},
    },
    java_jsonapi_access_log => {
        path         => "nginx/java.jsonapi.partner2.yandex.access-post.log",
        preparse     => \&access_log_preparse,
        groups_regex => [
            ['java_jsonapi_users' => [qr"^/restapi/v1/users/"]],
            ['java_jsonapi'       => [qr"^/restapi/"]],
            ['java_enrich_page'   => [qr"^/restapi/v1/api/bkdata/enrich_page"]]
        ],
        field_labels => sub {'endpoint_group' => $_[0]},
    },
    access_log => {
        path         => "nginx/partner2.yandex.access-post.log",
        preparse     => \&access_log_preparse,
        groups_regex => [
            ['alive'          => [qr"^/devel/alive", qr"^/v2/alive", qr"^/v2/simple_alive"]],
            ['statistics'     => [qr"^/(?:api|v2|widget)/statistics(?!2)"]],
            ['statistics2'    => [qr"^/(?:api|v2|widget)/statistics2"]],
            ['dashboard'      => [qr"^/(?:api|v2|widget)/dashboard"]],
            ['product'        => [qr"^/v2/(?:$products)"]],
            ['master-system'  => [qr"^/master-system/"]],
            ['business-rules' => [qr"^/v2/business-rules/"]],
            ['users'          => [qr"^/restapi/v1/users/\d"]],
            ['restapi'        => [qr"^/restapi/"]],
            ['form'           => [qr"^/form/"]],
            ['intapi'         => [qr"^/intapi/"]],
            ['api'            => [qr"^/api/"]],
            ['widget'         => [qr"^/widget/"]],
            ['others'         => [qr|^/$|]],
            ['others'         => [qr/\.(?:txt|svg|xml|css|js|ico|png)$/]],
            ['others'         => [qr||]],
        ],
        field_labels => sub {'endpoint_group' => $_[0]},
    },
    v2call_log => {
        path         => "nginx/frontend-node-v2call.log",
        preparse     => \&rosetta_log_preparse,
        field_labels => sub {
            my ($field_value) = @_;
            my ($model, $method) = split /\./, $field_value;
            return (
                model          => $model,
                rosetta_method => $method,
            );
        },
    },
};

our $CHECKS = {
    broken_pipe => {
        apply_to  => 'partner2_rosetta',
        regex     => qr/Socket error during syswrite: Broken pipe/,
        check_sub => \&_increment,
        final_sub => \&_dump_count,
    },
    oom_killer => {
        apply_to  => 'kern_log',
        regex     => qr/Killed process/,
        check_sub => \&_increment,
        final_sub => \&_dump_count,
    },
    fix_statistics => {
        apply_to  => qr/cron.*regular_update_statistics.*/,
        regex     => qr/FIX STATISTICS/,
        check_sub => \&_increment,
        final_sub => \&_dump_count,
    },
    video_pages_update_lost_statistics => {
        apply_to  => 'cron_adv_net_video_regular_update_statistics_for_month',
        regex     => qr/BAD\s+STATISTICS/,
        check_sub => sub {
            my ($log, $check, $temp_data, $line) = @_;

            my $field;
            if ($line =~ /view/) {
                $field = 'view';
            } elsif ($line =~ /open_player/) {
                $field = 'open_player';
            } else {
                return FALSE;
            }

            my ($total) = $line =~ /"total":(\d+)/;

            $temp_data->{$field} += $total // 0;
        },
        final_sub => sub {
            my ($check, $logs, $temp_data, $results) = @_;

            my $log_name = $check->{'apply_to'};

            foreach my $field (qw(view open_player)) {
                $results->{"$log_name.video_pages_update_lost_statistcis__$field"} = {
                    value => $temp_data->{$field} // 0,
                    solomon => {
                        type       => 'log_parser',
                        log_name   => $log_name,
                        check_name => "video_pages_update_lost_statistcis__$field",
                    },
                  },
                  ;
            }
        },
    },

    'access_log.status' => {
        apply_to  => 'access_log',
        regex     => qr//,
        check_sub => \&access_log_status_check,
        final_sub => \&access_log_status_finish,
    },
    'access_log.timing' => {
        apply_to  => 'access_log',
        regex     => qr//,
        check_sub => \&access_log_timing_check,
        final_sub => \&access_log_timing_finish,
    },
    'jsonapi_access_log.status' => {
        name      => 'access_log.status',
        apply_to  => 'jsonapi_access_log',
        regex     => qr//,
        check_sub => \&access_log_status_check,
        final_sub => \&access_log_status_finish,
        juggler   => \&access_log_status_juggler,
        service   => 'jsonapi',
        crit      => .3,
        warnings  => .6,
    },
    'jsonapi_access_log.timing' => {
        name      => 'access_log.timing',
        apply_to  => 'jsonapi_access_log',
        regex     => qr//,
        check_sub => \&access_log_timing_check,
        final_sub => \&access_log_timing_finish,
    },
    'java_jsonapi_access_log.status' => {
        name      => 'access_log.status',
        apply_to  => 'java_jsonapi_access_log',
        regex     => qr//,
        check_sub => \&access_log_status_check,
        final_sub => \&access_log_status_finish,
        juggler   => \&access_log_status_juggler,
        service   => 'java_jsonapi',
        crit      => .3,
        warnings  => .6,
    },
    'java_jsonapi_access_log.timing' => {
        name      => 'access_log.timing',
        apply_to  => 'java_jsonapi_access_log',
        regex     => qr//,
        check_sub => \&access_log_timing_check,
        final_sub => \&access_log_timing_finish,
    },
    'access_log.rps' => {
        apply_to  => 'access_log',
        regex     => qr//,
        check_sub => \&access_log_rps_check,
        final_sub => \&access_log_rps_finish,
        range     => 300,
    },
    'jsonapi_access_log.rps' => {
        name      => 'access_log.rps',
        apply_to  => 'jsonapi_access_log',
        regex     => qr//,
        check_sub => \&access_log_rps_check,
        final_sub => \&access_log_rps_finish,
        range     => 300,
    },
    'java_jsonapi_access_log.rps' => {
        name      => 'access_log.rps',
        apply_to  => 'java_jsonapi_access_log',
        regex     => qr//,
        check_sub => \&access_log_rps_check,
        final_sub => \&access_log_rps_finish,
        range     => 300,
    },
    'v2call_log.status' => {
        apply_to  => 'v2call_log',
        regex     => qr//,
        check_sub => \&access_log_status_check,
        final_sub => \&access_log_status_finish,
    },
    'v2call_log.timing' => {
        apply_to  => 'v2call_log',
        regex     => qr//,
        check_sub => \&access_log_timing_check,
        final_sub => \&access_log_timing_finish,
    },
    'v2call_log.rps' => {
        apply_to  => 'v2call_log',
        regex     => qr//,
        check_sub => \&access_log_rps_check,
        final_sub => \&access_log_rps_finish,
        range     => 300,
    },
};

=head1 Для запуска на dev

  # создаем путь на локальной машине для хранения позиций
  sudo mkdir -p /var/lib/partner2
  sudo chmod a+w /var/lib/partner2
  sudo rm -f /var/lib/partner2/log_parser_state.bin

  # выводим в лог результат обработки первых 100000 строк лога
  perl -I./lib -MCron -e'Cron->new->do' monitoring logs_monitoring \
    --path=/mnt/remote-log-rfs/pi-rtb01e.yandex.ru \
    --logs=access_log \
    --log_only=1 \
    --limit=100000

  # выводим в лог результат обработки статистики и список путей которые выбрались по указаному ключу
  perl -I./lib -MCron -e'Cron->new->do' monitoring logs_monitoring \
    --path=/home/ie2018/tmp/logs \
    --logs=jsonapi_access_log \
    --log_only=1 \
    --stat_path=jsonapi_others

  # для перезабора старой статистики
  bin/oneshots/PI-15594_send_to_graphite.pl \
    --stat=access_log \
    --from=30 \
    --root=/var/log

=cut

sub parse_logs {
    my ($app, %opts) = @_;

    my $path = $opts{'--path'} // $LOGS_DIRECTORY_PATH;

    $LOGS->{$_}{path} = $path . '/' . $LOGS->{$_}{path} for keys %$LOGS;

    my $state_file = "$DISK_STORAGE_DIRECTORY_PATH/log_parser_state.bin";

    INFO "Reading log positions from $state_file";

    my $positions = Utils::LogParser::read_log_positions($state_file);

    INFO "Starting sub process_logs $state_file";

    my $logs;
    if ($opts{'--logs'}) {
        my @list = split /\s*,\s*/, $opts{'--logs'};
        @{$logs}{@list} = @{$LOGS}{@list};
    } else {
        $logs = $LOGS;
    }

    my $checks;
    for my $log_key (keys %$logs) {
        for my $check (grep {Utils::LogParser::_check($CHECKS->{$_}{apply_to}, $log_key)} keys %$CHECKS) {
            $checks->{$check} = $CHECKS->{$check};
        }
    }

    my ($results, $new_positions, $stat_path) = Utils::LogParser::process_logs(
        $logs,
        $checks,
        $positions,
        limit     => $opts{'--limit'},
        stat_path => $opts{'--stat_path'},
    );

    INFO "Finished log processing. Writing new log positions to $state_file";

    Utils::LogParser::write_log_positions($state_file, $new_positions);

    INFO "Sending results to graphite";

    my $log_only       = $opts{'--log_only'};
    my $juggler_events = delete $results->{juggler_events};
    for my $path (keys %$results) {
        if ($log_only) {
            INFO "$path\n" . Dumper($results->{$path});
        } else {
            send_to_graphite(
                interval => 'five_min',
                path     => $path,
                value    => $results->{$path}{value},
                solomon  => $results->{$path}{solomon}
            );
        }
    }
    if ($juggler_events) {
        my %mapping = (
            0 => 'OK',
            1 => 'WARN',
            2 => 'CRIT',
        );
        my @events =
          map {
            +{
                host    => $app->get_option('hostname'),
                service => _get_juggler_service($app, $_),
                status  => ($mapping{$juggler_events->{$_}} // 'CRIT'),
             }
          }
          sort keys %$juggler_events;
        if ($log_only) {
            INFO "$_->{service} $_->{status}" foreach @events;
        } else {
            get_pjapi($app)->send(events => \@events);
        }
    }
    if ($opts{'--stat_path'}) {
        INFO "STAT_PATH: " . Dumper($stat_path);
    }

    INFO "Done parsing logs";

    return 1;
}

sub _increment {
    my ($log, $check, $temp_data, $line) = @_;
    $temp_data->{$log->{name}} += 1;
}

sub _dump_count {
    my ($check, $logs, $temp_data, $results) = @_;
    for my $log_key (keys %$temp_data) {
        my $log_name = $logs->{$log_key}{name};
        $results->{"$log_name.$check->{name}"} = {
            value => $temp_data->{$log_name} // 0,
            solomon => {
                type       => 'log_parser',
                check_name => $check->{name},
                sensor     => 'count',
            },
        };
    }
}

sub _get_juggler_service {
    my ($app, $name) = @_;

    return 'logs_monitoring__check_' . $name;
}

sub access_log_preparse {
    my ($line, $log, $stat) = @_;

    $stat //= {};

    my @line = split /\t/, $line;
    return unless $line[5] && $line[4] && defined $line[11];
    my @req = split / /, $line[4];
    return unless $req[1];
    (my $path = $req[1]) =~ s/\?.*$//;

    for my $row (@{$log->{groups_regex}}) {
        for my $re (@{$row->[1]}) {
            if ($path =~ $re) {
                my $suffix = defined $1 ? "_$1" : '';
                if (exists $stat->{$row->[0]}) {
                    $stat->{$row->[0]}{$path}++;
                }
                return [$line[5], $line[11], $row->[0] . $suffix];
            }
        }
    }

    return;
}

sub access_log_status_check {
    my ($log, $check, $temp_data, $line, $preparse) = @_;

    my ($status, $time, $field) = @$preparse;
    $temp_data->{$log->{name}}{$status}{$field}++ if $field;
}

sub access_log_status_finish {
    my ($check, $logs, $temp_data, $results) = @_;

    my $check_name = $check->{name};
    for my $log_name (keys(%$temp_data)) {
        while (my ($status, $hash) = each %{$temp_data->{$log_name}}) {
            while (my ($field, $value) = each %$hash) {
                $results->{"$check_name.$field.$status"} = {
                    value   => 0 + $value,
                    solomon => {
                        $logs->{$log_name}{field_labels}->($field),

                        type       => 'log_parser',
                        check_name => $check_name,
                        log_name   => $log_name,
                        http_code  => $status,
                        sensor     => 'count',
                    },
                };
            }
        }
    }
    if (my $juggler_sub = $check->{juggler}) {
        $juggler_sub->($check, $temp_data, $results);
    }
}

sub access_log_status_juggler {
    my ($check, $temp_data, $results) = @_;

    my $service_name = 'status__' . ($check->{service} // $check->{name});
    my $juggler_events = $results->{juggler_events} //= {};

    my @summ = (0, 0, 0);
    foreach my $status (keys %$temp_data) {
        my $index = (
            $status =~ /^2/ ? 0
            : (
                $status =~ /^5/ ? 2
                : 1
              )
        );
        $summ[$index] += $_ foreach (values %{$temp_data->{$status}});
    }

    my $crit_summ = $summ[2];
    my $warn_summ = $summ[1] + $crit_summ;
    my $all_summ  = $summ[0] + $warn_summ;
    if ($all_summ > 0) {
        my $crit_limit = $check->{crit}     // .5;
        my $warn_limit = $check->{warnings} // .5;
        $juggler_events->{$service_name} = (
            $crit_summ / $all_summ >= $crit_limit ? 2
            : (
                $warn_summ / $all_summ >= $warn_limit ? 1
                : 0
              )
        );
    } else {
        $juggler_events->{$service_name} = 0;
    }
}

sub access_log_timing_check {
    my ($log, $check, $temp_data, $line, $preparse) = @_;

    my ($status, $time, $field) = @$preparse;
    push @{$temp_data->{$log->{name}}{$field}}, $time if $field;
}

sub access_log_timing_finish {
    my ($check, $logs, $temp_data, $results) = @_;

    my $check_name = $check->{name};
    for my $log_name (keys(%$temp_data)) {
        while (my ($field, $value) = each %{$temp_data->{$log_name}}) {
            my @ary = sort {$a <=> $b} grep {looks_like_number($_)} @$value;
            for my $part (qw(50 60 90 95 98 99 100)) {
                my $pos = $part / 100 * $#ary;
                $results->{"$check_name.$field.$part"} = {
                    value   => 0 + $ary[$pos],
                    solomon => {
                        $logs->{$log_name}{field_labels}->($field),

                        type       => 'log_parser',
                        check_name => $check_name,
                        log_name   => $log_name,
                        percentile => $part,
                        sensor     => 'request_time',
                    },
                };
            }
        }
    }
}

sub access_log_rps_check {
    my ($log, $check, $temp_data, $line, $preparse) = @_;

    my ($status, $time, $field) = @$preparse;
    $temp_data->{$log->{name}}{$field}++ if $field;
}

sub access_log_rps_finish {
    my ($check, $logs, $temp_data, $results) = @_;

    my $range = $check->{range};

    my $check_name = $check->{name};
    for my $log_name (keys(%$temp_data)) {
        while (my ($field, $value) = each %{$temp_data->{$log_name}}) {
            $results->{"$check_name.$field"} = {
                value   => 0 + $value / $range,
                solomon => {
                    $logs->{$log_name}{field_labels}->($field),

                    type       => 'log_parser',
                    check_name => $check_name,
                    log_name   => $log_name,
                    sensor     => 'rps',
                },
            };
        }
    }
}

sub rosetta_log_preparse {
    my ($line, $log) = @_;

    my ($ip, $client, $date, $orig_m, $orig, $request, $code, $length, $time) = split /\t/, $line;
    return unless $request and defined $code and defined $time;

    $request =~ s/"//g;
    my ($model, $method, @rest) = split /\./, $request;
    return unless $model and $method and !@rest;

    $time =~ s/\s+$//;

    return [$code, $time, "$model.$method"];
}

1;
