#!/usr/bin/perl

use Digest::MD5 qw(md5 md5_hex md5_base64);

%exts = ("video/3gpp" => ".3g2",
"application/x-7z-compressed" => ".7z" ,
"audio/aac" => ".aac",
"application/cdr" => ".cdr",
"application/x-chm" => ".chm",
"application/com" => ".com",
"image/x-canon-cr2" => ".cr2",
"text/csv" => ".csv",
"application/octet-stream" => ".db",
"application/x-apple-diskimage" => ".dmg",
"application/x-msdownload" => ".exe",
"application/x-fla" => ".fla",
"video/x-flv" => ".flv",
"text/calendar" => ".ics",
"application/x-java-jnlp-file" => ".jnlp",
"application/x-iwork-keynote-sffkey" => ".key",
"video/x-m4v" => ".m4v",
"video/x-matroska" => ".mkv",
"audio/mpeg" => ".mp3",
"video/mp4" => ".mp4",
"application/x-msi" => ".msi",
"image/x-nikon-nef" => ".nef",
"application/x-iwork-numbers" => ".numbers",
"application/ogg" => ".ogg",
"application/x-iwork-pages-sffpages" => ".pages",
"text/php" => ".php",
"audio/x-scpls" => ".pls",
"image/vnd.adobe.photoshop" => ".psd",
"application/x-rar" => ".rar",
"application/vnd.rn-realmedia-vbr" => ".rmvb",
"application/x-rpm" => ".rpm",
"application/x-bzip" => ".tbz",
"application/x-bzip2" => ".tbz2",
"image/x-tga" => ".tga",
"application/x-gzip" => ".tgz",
"font/ttf" => ".ttf",
"video/x-ms-wmv" => ".wmv",
"application/msword" => ".doc",
"application/rtf" => ".rtf",
"application/x-shockwave-flash" => ".swf",
"image/tiff" => ".tiff",
"image/vnd.djvu" => ".djvu",
"application/vnd.oasis.opendocument.text" => ".odt",
"application/vnd.oasis.opendocument.spreadsheet" => ".ods",
"application/postscript" => ".ps",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => ".docx",
"application/vnd.openxmlformats-officedocument.wordprocessingml.template" => ".dotx",
"application/vnd.ms-word.document.macroEnabled.12" => ".docm",
"application/vnd.ms-word.template.macroEnabled.12" => ".dotm",
"application/vnd.ms-excel" => ".xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => ".xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.template" => ".xltx",
"application/vnd.ms-excel.sheet.macroEnabled.12" => ".xlsm",
"application/vnd.ms-excel.template.macroEnabled.12" => ".xltm",
"application/vnd.ms-excel.addin.macroEnabled.12" => ".xlam",
"application/vnd.ms-excel.sheet.binary.macroEnabled.12" => ".xlsb",
"application/vnd.ms-powerpoint" => ".ppt",
"application/vnd.openxmlformats-officedocument.presentationml.presentation" => ".pptx",
"application/vnd.openxmlformats-officedocument.presentationml.template" => ".potx",
"application/vnd.openxmlformats-officedocument.presentationml.slideshow" => ".ppsx",
"application/vnd.ms-powerpoint.addin.macroEnabled.12" => ".ppam",
"application/vnd.ms-powerpoint.presentation.macroEnabled.12" => ".pptm",
"application/vnd.ms-powerpoint.template.macroEnabled.12" => ".potm",
"application/vnd.ms-powerpoint.slideshow.macroEnabled.12" => ".ppsm",
"text/plain" => ".txt",
"image/x-nikon-nef" => ".nef",
"application/pdf" => ".pdf",
"application/x-gzip" => ".gz",
"application/x-bzip2" => ".bz2",
"application/x-tar" => ".tar",
"application/zip" => ".zip",
"image/jpeg" => ".jpg",
"image/jpeg" => ".jpeg",
"image/gif" => ".gif",
"image/bmp" => ".bmp",
"image/png" => ".png");




%requests = ();
%converted = ();

while($line=<STDIN>) {
    chomp($line);

    if ($line =~ /req-id=([a-zA-Z0-9]+), stage=schedule-convert, .* success=true, status="Convert scheduled for \[HTML_WITH_IMAGES\].", .* detected-content-type=\"(.*)\"/) {
        $requests{$1} = $2;
    } elsif ($line =~ /req-id=([a-zA-Z0-9]+), stage=schedule-convert, .* success=true, status="Types \[HTML_WITH_IMAGES\] done already."/) {
        $convert_cached++;
    } elsif($line =~ /req-id=([a-zA-Z0-9]+), stage=store-result, .* success=true/) {
        $convert_success++;
        $converted{$requests{$1}}++;

        if(defined($exts{$requests{$1}})) {
            $converted_success_exts{$exts{$requests{$1}}}++;
        } else {
            $converted_success_exts{$requests{$1}}++;
        }
    } elsif($line =~ /stage=copy.* success=true/) {
        $copy_success++;
    } elsif($line =~ /stage=copy.* success=false/) {
        $copy_failed++;

        if($line =~ /FILE_TOO_BIG/) {
            $copy_failed_types{"FILE_TOO_BIG"}++;
        } elsif($line =~ /ARCHIVE_TOO_BIG/) {
            $copy_failed_types{"ARCHIVE_TOO_BIG"}++;
        } elsif($line =~ /stid couldn't be found in response/) {
            $copy_failed_types{"NO_STID"}++;
        } elsif($line =~ /FILE_NOT_FOUND/) {
            $copy_failed_types{"FILE_NOT_FOUND"}++;
        } elsif($line =~ /FILE_IS_EMPTY/) {
            $copy_failed_types{"FILE_IS_EMPTY"}++;
        } elsif($line =~ /FILE_IS_EMPTY/) {
            $copy_failed_types{"FILE_IS_EMPTY"}++;
        } else {
            $copy_failed_types{"OTHER AND UNKNOWN"}++;
        }

    } elsif($line =~ /stage=convert, duration=(\d+\.\d+), success=true, .* converter=(.*)/) {
        if($1 < 5) {
            $converters{$2}{"0x5"}++;
        } elsif($1 < 20) {
            $converters{$2}{"5x20"}++;
        } else {
            $converters{$2}{"20xinf"}++;
        }

    } elsif($line =~ /req-id=([a-zA-Z0-9]+), stage=convert, .* success=false/) {
        $convert_failed++;
        $converted_failed{$requests{$1}}++;

        if(defined($exts{$requests{$1}})) {
            $converted_failed_exts{$exts{$requests{$1}}}++;
        } else {
            $converted_failed_exts{$requests{$1}}++;
        }

        if($line =~ /UNSUPPORTED_SOURCE_TYPE/) {
            $convert_failed_types{"UNSUPPORTED_SOURCE_TYPE"}++;
        } elsif($line =~ /CONVERT_TIMEOUT/) {
            $convert_failed_types{"CONVERT_TIMEOUT"}++;
        } elsif($line =~ /FILE_IS_PASSWORD_PROTECTED/) {
            $convert_failed_types{"FILE_IS_PASSWORD_PROTECTED"}++;
        } else {
            $convert_failed_types{"OTHER AND UNKNOWN"}++;
        }

    } elsif($line =~ /GET \/htmlimage.*id=(.*)\&width.* HTTP\/1.0" (\d+).* (\d+.\d+)/) {
        if($2 == 200 ) {
            $htmlimage_success++;
            $htmlimage_success_arr{md5_hex($1)}++;
            if($3 < 0.2) {
                $htmlimage_time{"0x0.2"}++;
            } elsif($3 < 1) {
                $htmlimage_time{"0.2x1"}++;
            } elsif($3 < 20) {
                $htmlimage_time{"1x20"}++;
            } else {
                $htmlimage_time{"20xinf"}++;
            }
        } elsif($2 == 500) {
            $htmlimage_failed++;
            $htmlimage_failed_arr{md5_hex($1)}++;
        }
    }
}

print "попаданий в кеш: $convert_cached\n";

print "\n======\n\n";

print "всего показано pdf-страниц: ".($htmlimage_success+$htmlimage_failed)."\n";
print "успешных отображений pdf-страниц: $htmlimage_success (";
printf("%.2f", (100/($htmlimage_success+$htmlimage_failed)*$htmlimage_success));
print "%)\n";
print "ошибок отображения pdf-страниц: $htmlimage_failed (";
printf("%.2f", (100/($htmlimage_success+$htmlimage_failed)*$htmlimage_failed));
print "%)\n";


print "\n";

print "всего показано pdf-документов: ".(scalar(keys %htmlimage_success_arr)+scalar(keys %htmlimage_failed_arr))."\n";
print "успешных отображений pdf-документов: ".scalar(keys %htmlimage_success_arr)." (";
printf("%.2f", (100/(scalar(keys %htmlimage_success_arr)+scalar(keys %htmlimage_failed_arr))*scalar(keys %htmlimage_success_arr)));
print "%)\n";
print "ошибок отображения pdf-документов: ".scalar(keys %htmlimage_failed_arr)." (";
printf("%.2f", (100/(scalar(keys %htmlimage_success_arr)+scalar(keys %htmlimage_failed_arr))*scalar(keys %htmlimage_failed_arr)));
print "%)\n";

print "\n";

print "распределение времени успешных показов pdf-страниц:\n\n";

print "- 0x0.2: ".$htmlimage_time{"0x0.2"}."\n";
print "- 0.2x1: ".$htmlimage_time{"0.2x1"}."\n";
print "- 1x20: ".$htmlimage_time{"1x20"}."\n";
print "- 20xinf: ".$htmlimage_time{"20xinf"}."\n";

print "\n";

print "\n======\n\n";

print "успешная конвертация: $convert_success (";
printf("%.2f", (100/($convert_success+$convert_failed)*$convert_success));
print "%)\n";
print "ошибок конвертации: $convert_failed (";
printf("%.2f", (100/($convert_success+$convert_failed)*$convert_failed));
print "%)\n";

print "\n";

print "сводная таблица по конвертации:\n";
print "ошибок, удач, %ошибок, расширение файла\n";
foreach $key (sort { $converted_failed_exts {$b} <=> $converted_failed_exts {$a}} keys %converted_failed_exts ) {

    print "$converted_failed_exts{$key}, $converted_success_exts{$key}, ";
    printf("%.2f", (100/($converted_failed_exts{$key}+$converted_success_exts{$key})*$converted_failed_exts{$key}));
    print "%, $key\n";
}

print "\n";

print "ошибок конвертации по типам:\n";
foreach $key (sort { $convert_failed_types {$b} <=> $convert_failed_types {$a}} keys %convert_failed_types ) {
    print "$convert_failed_types{$key}: $key\n";
}

print "\n";

print "\n";

print "распределение по времени работы каждого конвертера:\n\n";
foreach (sort keys %converters) {
    print "$_:\n";
    print "- 0x5: ".$converters{$_}{"0x5"}."\n";
    print "- 5x20: ".$converters{$_}{"5x20"}."\n";
    print "- 20xinf: ".$converters{$_}{"20xinf"}."\n";
}
print "\n";


print "\n======\n\n";

print "всего копирований: ".($copy_success+$copy_failed)."\n";
print "успех копирования: ".$copy_success." (";
printf("%.2f", (100/($copy_success+$copy_failed)*$copy_success));
print "%)\n";
print "ошибок копирования: ".$copy_failed." (";
printf("%.2f", (100/($copy_success+$copy_failed)*$copy_failed));
print "%)\n";





print "\n";

print "ошибок копирования по типам:\n";
foreach $key (sort { $copy_failed_types {$b} <=> $copy_failed_types {$a}} keys %copy_failed_types ) {
    print "$copy_failed_types{$key}: $key\n";
}




