#!/usr/bin/perl -wl

use strict;

my $datetime_regex = qr/^\s*(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(\.\d+)?/;
my $format = "tskv\ttskv_format=mail-full-log-msproxy-revisited";

while (<>) {
    chomp;
    my ($pos, $line);
    if(/^(\d+;\d+;\d+;)(.+)$/o) {
        ($pos, $line) = ($1, $2);
    } else {
        ($pos, $line) = ("", $_);
    }

# 2014-12-10 20:15:40.723	INFO	6JS9UM	SUID 396662485 -- SIDE web -- MESSAGE_TYPE [13, 14] -- PREDICTED_ACTION DELETE -- REAL_ACTION DELETE -- PREDICTED YES -- PREDICTION IS CORRECT
# 2014-12-10 20:15:40.729	INFO	6JS9UJ	SUID 155823185 -- SIDE web -- MESSAGE_TYPE [4] -- PREDICTED_ACTION REPLY -- REAL_ACTION FORWARD -- PREDICTED YES -- PREDICTION IS WRONG
    my ($datetime, undef, $query_id, $rest) = $line =~ /$datetime_regex\s+(?:FINE|INFO)\s*(\w+)\s*(?:socheck:)?(.*)/ or next;
    if ($rest =~ /^\s*Sending SO request:\s*(.*)/) {
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=so_step1_request_sent\tso_request=$1";
    } elsif ($rest =~ /^\s*Response received, gonna parse it/) {
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=so_step1_response_received";
    } elsif ($rest =~ /^\s*check result is:\s*(\w+)/) {
        my $so_status = lc $1;
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=so_status_received\tso_status=$so_status";
    } elsif ($rest =~ /^\s*requesting\s*(.*)/) {
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=search_request_sent\tso_request=$1";
    } elsif ($rest =~ /^\s*found\s*(\d+)\s*documents/) {
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=data_found_info\tdocs_count=$1";
    } elsif ($rest =~ /^\s*request time\s*<([^>]+)>\s*:\s*(\d+)/) {
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=request_time_info\tvhost=$1\tso_request_time=$2";
    } elsif ($rest =~ /^\s*Total SO request execution time\s*:\s*(\d+)/) {
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=so_request_time_info\tso_request_time=$1";
    } elsif ($rest =~ /^SUID\s+(\d+)\s+--\s+SIDE\s+(\w+)\s+--\s+MESSAGE_TYPE\s+(\[[^\]]+])\s*--\s*PREDICTED_ACTION\s+(\w+)\s*--\s*REAL_ACTION\s+(\w+)\s*--\s*PREDICTED\s+(\w+)\s+--\s+PREDICTION IS\s+(\w+)/) {
	my $suid = lc $1;
	my $side = lc $2;
        my $message_type = lc $3;
        my $predicted_action = lc $4;
        my $real_action = lc $5;
	my $predicted_status = lc $6;
        my $prediction_status = lc $7;
        print $pos, "$format\ttimestamp=$datetime\tquery_id=$query_id\taction=so_predict_result\tsuid=$suid\tside=$side\tmessage_type=$message_type\tpredicted_action=$predicted_action\treal_action=$real_action\tpredicted_status=$predicted_status\tprediction_status=$prediction_status";
    }
}