#!/usr/bin/perl
# extract fields from /var/log/msearch-proxy/access.log
# st/MAILSRCH-2

use strict;
use warnings;

while(<STDIN>) {
        chomp;
        my ($pos, $line);
        if(/^(\d+;\d+;\d+;)(.+)$/o) {
                ($pos, $line) = ($1, $2);
        } else {
                ($pos, $line) = ("", $_);
        }


        next unless $line =~ /^([^\s]+)\s-\s([^\s]+)\s\[([^\s]+)\s([\+-]\d+)\]\s"([^\s]+)\s([^\s]+)\s([^"]+)"\s(\d+)\s(\d+)\s(\d+)\s([^\s]+)\s([^\s]+)\s(\[\[.*\]\(\d+\)\]|-)\s?(\d*)$/o;
        my ($ip, $tvmsrc, $timestamp, $timezone, $method, $request, $protocol, $status, $response_size, $processing_millitime, $request_identifier, $http_host, $upstream_stats, $docs_count) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14);
        $docs_count = $docs_count || 0;

    if ($request =~ m{^/((?:\?|$).*)}) {
        if (lc $method eq 'get') {
            $request = '/api/mail_old/search' . $1;
        } elsif (lc $method eq 'post') {
            $request = '/api/mail_old/delete' . $1;
        }
    } elsif ($request =~ m{/api/chemodan_([^?/]+)(.*)}) {
        $request = "/api/attaches/$1" . $2;
    }

        print $pos, "tskv\ttskv_format=access-log-msproxy\tvhost=msearch-proxy.mail.yandex.net\tip=$ip\ttimestamp=$timestamp\ttimezone=$timezone\tmethod=$method"
                        ."\trequest=$request\tprotocol=$protocol\tstatus=$status\tresponse_size=$response_size\tprocessing_millitime=$processing_millitime\tdocs_count=$docs_count\trequest_identifier=$request_identifier\n";
}