#!/usr/bin/awk -f
# -*- Encoding: utf-8 -*-
# kate: space-indent on; indent-width 2; replace-tabs on;
#
# Used: awk -f daily-report.awk checkform-access.log
#
# $6  : request uri
# $8  : answer|error code
# $NF : request time

BEGIN {
    split("ADV AUTORU CALENDAR DISK FEEDBACK FORMS_BACK_DEV FURITA GEOCHATS JOB MARKET MOIKRUG PAYMENT POSTCARD REALTY UNKNOWN XMPP YAPHOTO YARU YAUSLUGI", services, " ")
    asort(services)
    for( i in services ) {
        serviceStat[services[i]] = 0
        min100ms[services[i]] = 0
        min100msTotal = 0
        min200ms[services[i]] = 0
        min200msTotal = 0
        min500ms[services[i]] = 0
        min500msTotal = 0
        min1sec[services[i]] = 0
        min1secTotal = 0
        more1sec[services[i]] = 0
        more1secTotal = 0
        errCodesStat[services[i]] = 0
        errCodesTotal = 0
    }
}

function print_array(statArray){
    j = 0
    for (i in statArray) indices[++j] = i
    len = asorti(statArray, indices)
    for (i=1; i<=len; i++)
      if (indices[i] != "") {
        printf("<tr><td>%-10s</td><td>%s</td></tr>\n", indices[i], statArray[indices[i]])
      }
}

function print_service_err(servicesStat, errStat){
    j = 0
    for (i in servicesStat) indices[++j] = i
    len = asorti(servicesStat, indices)
    for (i=1; i<=len; i++) {
      if(servicesStat[indices[i]] != 0)
        percent = 100.0 * errStat[indices[i]]/servicesStat[indices[i]];
      else
        percent = 0;
      printf("<tr><td>%-10s</td><td>%-10s</td><td>%g</td></tr>\n", indices[i], servicesStat[indices[i]], percent)
    }
}

function add_info(svc_name, http_code, timeout, http_method_key) {
    serviceStat[svc_name]++;
    serviceStat[http_method_key]++;
    if (http_code ~ /^[0-9]+$/) {
      code[http_code]++;
      if (http_code != 200){
        errCodesStat[svc_name]++;
        errCodesStat[http_method_key]++;
        errCodesTotal++
      }
    }
    if (timeout <= 0.1){
        min100ms[svc_name]++
        min100ms[http_method_key]++
        min100msTotal++
    } else if (timeout <= 0.2 && timeout > 0.1){
        min200ms[svc_name]++
        min200ms[http_method_key]++
        min200msTotal++
    } else if (timeout <= 0.5 && timeout > 0.2){
        min500ms[svc_name]++
        min500ms[http_method_key]++
        min500msTotal++
    } else if (timeout <= 1 && timeout > 0.5){
        min1sec[svc_name]++
        min1sec[http_method_key]++
        min1secTotal++
    } else {
        more1sec[svc_name]++
        more1sec[http_method_key]++
        more1secTotal++
        #query[timeout, svc_name] = $6
    }
}

$6 != "/ping" && $6 != "/status" && $6 != "/unistat" && $6 !~ /action=allrulesreset/ {
  if ($6 ~ /\Wso_service=/) {
    match($6, /(\Wso_service=)([A-Z_]+)/, ss)
    svc_name=toupper(ss[2])
  } else if ($6 ~ /\Wservice=/) {
    match($6, /(\Wservice=)([A-Z_]+)/, ss)
    svc_name=toupper(ss[2])
  } else if ($0 ~ /\Wso_service=/) {
    match($0, /(\Wso_service=)([A-Z_]+)/, ss)
    svc_name=toupper(ss[2])
  } else if ($0 ~ /\Wname=\\x22so_service\\x22\\x0D\\x0A\\x0D\\x0A[A-Z_]+\\x0D\\x0A/) {
    match($0, /\Wname=\\x22so_service\\x22\\x0D\\x0A\\x0D\\x0A([A-Z_]+)\\x0D\\x0A/, ss)
    svc_name=ss[1]
  } else
      svc_name="<i>UNPARSED_SERVICE</i>"
  if ($8 ~ /^[0-9]+$/)
    http_code=$8;
  else
    http_code=$7;
  if ($5 ~ /"POST/)
    http_method = "POST";
  else if ($5 ~ /"GET/)
    http_method = "GET";
  else
    http_method = "UNPARSED";
  add_info(svc_name, http_code, $NF, "<i>" http_method "-requests</i>")
}

END {
    print "Content-Type: text/html; charset='utf-8'"
    print "MIME-Version: 1.0"
    print "Content-Transfer-Encoding: 8bit"
    print "From: Robot Mailspam <robot-mailspam@yandex-team.ru>"
    print "To: so-report@yandex-team.ru"
    printf("Subject: [%s] Checkform: Daily report\n", strftime("%Y-%m-%d", systime() - 86400))
    print "\n<html>\n<head>"
    print "<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />"
    print "</head>"
    print "<body>\n<h3>Requests by service</h3>"
    print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Total</b></td><td><b>Error %</b></td></tr>"
    print_service_err(serviceStat, errCodesStat)
    print "</tr></tbody></table><br />"
    print "<h3>Codes by answers</h3>"
    print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>HTTP code</b></td><td><b>Count</b></td></tr></thead><tbody><tr>"
    print_array(code)
    print "</tr></tbody></table><br />"
    if (errCodesTotal > 0) {
      print "<h3>Errors count by service</h3>"
      print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Errors count</b></td></tr></thead><tbody><tr>"
      print_array(errCodesStat)
      printf("<tr><td><b>Total</b></td><td>%s</td></tr>\n", errCodesTotal)
      print "</tr></tbody></table><br />"
    } else {
      printf("<h3>Errors count by service: no errors in %d queries</h3>\n", errCodesStat)
    }
    if (min100msTotal > 0) {
      print "<h3>Request timeout &lt; 0.100s</h3>"
      print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Requests count</b></td></tr></thead><tbody><tr>"
      print_array(min100ms)
      printf("<tr><td><b>Total</b></td><td>%s</td></tr>\n", min100msTotal)
      print "</tr></tbody></table><br />"
    } else {
      print "<h3>Request timeout &lt; 0.100s: no requests</h3>"
    }
    if (min200msTotal > 0) {
      print "<h3>Request timeout &gt; 0.100s and &lt; 0.200s</h3>"
      print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Requests count</b></td></tr></thead><tbody><tr>"
      print_array(min200ms)
      printf("<tr><td><b>Total</b></td><td>%s</td></tr>\n", min200msTotal)
      print "</tr></tbody></table><br />"
    } else {
      print "<h3>Request timeout &gt; 0.100s and &lt; 0.200s: no requests</h3>"
    }
    if (min500msTotal > 0) {
      print "<h3>Request timeout &gt; 0.200s and &lt; 0.500s</h3>"
      print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Requests count</b></td></tr></thead><tbody><tr>"
      print_array(min500ms)
      printf("<tr><td><b>Total</b></td><td>%s</td></tr>\n", min500msTotal)
      print "</tr></tbody></table><br />"
    } else {
      print "<h3>Request timeout &gt; 0.200s and &lt; 0.500s: no requests</h3>"
    }
    if (min1secTotal > 0) {
      print "<h3>Request timeout &gt; 0.500s and &lt; 1sec</h3>"
      print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Requests count</b></td></tr></thead><tbody><tr>"
      print_array(min1sec)
      printf("<tr><td><b>Total</b></td><td>%s</td></tr>\n", min1secTotal)
      print "</tr></tbody></table><br />"
    } else {
      print "<h3>Request timeout &gt; 0.500s and &lt; 1sec: no requests</h3>"
    }
    if (more1secTotal > 0) {
      print "<h3>Request timeout &gt; 1sec</h3>"
      print "<table cellspacing='4' cellpadding='8'><thead><tr><td><b>Service</b></td><td><b>Requests count</b></td></tr></thead><tbody><tr>"
      print_array(more1sec)
      printf("<tr><td><b>Total</b></td><td>%s</td></tr>\n", more1secTotal)
      print "</tr></tbody></table><br />"
    } else {
      print "<h3>Request timeout &gt; 1sec: no requests</h3>"
    }
#    print "<h3>Slow requests by service</h3>"
#    print "<table width=100%><thead><tr><td>Service</td><td>Requests count</td></tr>"
#    for (i in query) {
#	    split(i,sep,SUBSEP);
#	    print sep[2], sep[1], query[sep[1],sep[2]],  "\n";
#    }
#   print "</table><br />"
    print "</body>\n</html>"
}
