#!/usr/bin/perl
# extract fields from /var/log/msearch-proxy/access.log
# https://st.yandex-team.ru/PS-1788

use strict;
use warnings;
use Date::Parse;
use URI::Escape;

while(<STDIN>) {
        my ($pos, $line);
        if(/^(\d+;\d+;\d+;)(.+)$/o) {
                ($pos, $line) = ($1, $2);
        } else {
                ($pos, $line) = ("", $_);
        }

        next unless $line =~ /^([^\s]+)\s-\s-\s\[([^\s]+)\s([\+-]\d+)\]\s"([^\s]+)\s([^\s]+)\s([^"]+)"\s(\d+)\s(\d+)\s(\d+)\s([^\s]+)\s([^\s]+)\s(\[[^\]]+\]|-)\s(\d+)$/o;
        my ($ip, $timestamp, $timezone, $method, $request, $protocol, $status, $response_size, $processing_millitime, $request_identifier, $http_host, $upstream_stats, $docs_count) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);

        next unless $request =~ /^\/api\/async\/mail\/search/o;

        my ($tableName, $uid, $reqid, $remote_ip, $mdb, $DeviceType, $state);

        if($request =~ /[&?]uid=(\w+)/o) {
                $uid = $1;
        } elsif($request =~ /user=(\w+)/o) {
                $uid = $1;
        } else {
            next;
        }

        my $timestamp_unix = str2time($timestamp);
        my $timestamp_unix_ms = str2time($timestamp).'000';

        if( $uid >= 1120000000000000 && $uid < 1130000000000000 ) {
            $tableName="corp_users_history";
        } else {
            $tableName="users_history";
        };

        if($request =~ /mdb=([^&]*)/o) {
            $mdb = $1;
        } else {
            $mdb=""
        };

        if($request =~ /remote_ip=([^&]*)/o) {
            $remote_ip = $1;
        } else {
            $remote_ip=""
        };

        if($request =~ /side=([^&]*)/o) {
            $DeviceType = $1;
        } elsif($request =~ /imap=([^&]*)/o) {
            $DeviceType = "imap";
        } else {
            $DeviceType = "";
        }

        my($user_request, $text, $request_param);

        if($request =~ /user_request=([^&]*)/o) {
            $user_request = $1;
        } else {
            $user_request= "";
        }

        if($request =~ /&text=([^&]*)/o) {
            $text = $1;
        } else {
            $text= "";
        }

        if($request =~ /&request=([^&]*)/o) {
            $request_param = $1;
        } else {
            $request_param= "";
        }

        if($request =~ /&reqid=([^&]*)/o) {
            $reqid = $1;
        } else {
            $reqid= "";
        }

        my $remote_ip_decoded = uri_unescape($remote_ip);

        $state="user_request=".$user_request.";text=".$text.";request=".$request_param.";found=".$docs_count;
        $state =~ s/=/\\=/g;

        print $pos, "tskv\ttskv_format=mail-user-journal-tskv-log\ttarget=mailbox\ttableName=$tableName\tuid=$uid\tdate=$timestamp_unix_ms\tunixtime=$timestamp_unix\tip=$remote_ip_decoded\tmodule=search\toperation=search\tDeviceType=$DeviceType\tmdb=$mdb\tstate=$state\treqid=$reqid\n";
}