#!/usr/bin/perl

use strict;
use warnings;
no warnings 'uninitialized';
use utf8;
use JSON;
use Yandex::TimeCommon qw/mysql2unix/;
use Yandex::ListUtils qw/nsort/;
use List::Util qw/sum/;

my (%lines_per_sec, %sec_per_file, %start);
while (<>) {
    my ($date, $time, $json) = (m!(\d+\-\d+\-\d+)\s+(\d+:\d+:\d+)\s+.+?(\{.*\})$!);
    next unless $json;
    my $r = eval { from_json($json) };
    if ($@) {
        warn $@;
        next;
    }
    my $f = $r->{file};
    next unless $f;
    my ($id) = ($f =~ m!.*/(.+?)\.log.*!);
    next unless $id;
    my $ts = mysql2unix("$date $time");
    if ($r->{action} eq 'reading file') {
        if ($start{$id}) {
            # warn "dangling start on $f\n";
        }
        $start{$id} = $ts;
    }
    if ($r->{action} eq 'finish') {
        if (!$start{$id}) {
            # warn "missing start for $id, file = $f\n";
            next;
        }
        if ($ts == $start{$id}) {
            delete $start{$id};
            next;
        }
        my $lines = $r->{lines};
        push @{$lines_per_sec{$id}}, ($lines / ($ts - $start{$id}));
        push @{$sec_per_file{$id}}, $ts - $start{$id};
        delete $start{$id};
    }
}

print "id\tavg_lps\tmedian_lps\tavg_spf\tmedian_spf\n";
for my $id (sort keys %sec_per_file) {
    my $avg_lps = int(avg(@{$lines_per_sec{$id}}));
    my $median_lps = int(median(@{$lines_per_sec{$id}}));
    my $avg_spf = _format_time( avg(@{$sec_per_file{$id}}) );
    my $median_spf = _format_time( median(@{$sec_per_file{$id}}) );
    print "$id\t$avg_lps\t$median_lps\t$avg_spf\t$median_spf\n";
}

sub avg
{
    return 0 if scalar @_ == 0;
    return sum(@_)/scalar(@_);
}

sub median
{
    return (nsort @_)[scalar(@_)/2];
}

sub _format_time
{
    my $sec = shift;
    if ($sec < 60) {
        return sprintf "%.2fs", $sec;
    }
    if ($sec < 3600) {
        return sprintf "%02dm%02ds", $sec/60, $sec % 60,
    }
    return sprintf "%dh%02dm%02ds", $sec/3600, ($sec%3600)/60, $sec%60;
}

