#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, re, email.message, subprocess
sys.path.insert(0, '/opt/precast-sosearchlog-parser')
from datetime import date, timedelta
from collections import defaultdict
from socket import gethostname
from common import CFG, get_traceback, sendEmail

SENDANSWER_LOGFILE = "/u0/%s/usr/local/www/logs/SO_MSEARCHPROXY/sendanswer.log.1" % gethostname()
NGINX_LOGFILE = "/u0/%s/usr/local/www/logs/SO_MSEARCHPROXY/nginx/access-msearchproxy.log.1" % gethostname()
DATE = (date.today() + timedelta(days = -1)).isoformat()
stat, nginx = defaultdict(int), defaultdict(lambda: defaultdict(int))
for line in open(SENDANSWER_LOGFILE):
    m = re.search(r'"spam":(true|false)', line)
    if m:
        stat['spam' if m.group(1) == 'true' else 'ham'] += 1
        stat['spam_total'] += 1
    m = re.search(r'(\d+)\s*:\s*MSEARCH-PROXY\s*\-\s*(.+),\s*\)', line)
    if m:
        stat['FULLCHK'] += int(m.group(1))
        for p_str in m.group(2).split(','):
            p = p_str.strip().split('=')
            stat[p[0]] += int(p[1]) if len(p) > 1 else 1
        stat['TOTAL'] += 1
total = 0
for line in open(NGINX_LOGFILE):
    sf = line.split()
    if len(sf) < 11:
        print >>sys.stderr, "Wrong log row: %s" % line
        continue
    total += 1
    m = re.match(r'\w+[-\.]\w+', sf[2])
    host = m.group(0) if m else '-'
    i = 7 if sf[8] == '-' else 8
    code, upstream_response_time, request_time = int(sf[i]), float(sf[i+1] if sf[i+1].isdigit() else 0), float(sf[i+2] if sf[i+2].isdigit() else 0)
    nginx[host]['total'] += 1
    if code == 400:
        nginx[host]['400'] += 1
    elif code == 404:
        nginx[host]['404'] += 1
    elif code == 499:
        nginx[host]['499'] += 1
    elif code < 200:
        nginx[host]['1xx'] += 1
    elif code < 300:
        nginx[host]['2xx'] += 1
    elif code < 400:
        nginx[host]['3xx'] += 1
    elif code < 500:
        nginx[host]['4xx'] += 1
    elif code < 600:
        nginx[host]['5xx'] += 1
    else:
        nginx[host]['unknown'] += 1
    if request_time <= 0.003:
        nginx[host]['0x003'] += 1
    elif upstream_response_time <= 0.01:
        nginx[host]['003x01'] += 1
    elif upstream_response_time <= 0.5:
        nginx[host]['01x05'] += 1
    elif upstream_response_time <= 1:
        nginx[host]['05x1'] += 1
    else:
        nginx[host]['1xinf'] += 1
d = {
    '0x003':   '0-0.03',
    '003x01':  '0.03-0.1',
    '01x05':   '0.1-0.5',
    '05x1':    '0.5-1',
    '1xinf':   '1-inf',
    'spam':    'spam (count)',
    'ham':     'ham (count)',
    'FULLCHK': 'full_chk (ms)',
    'MNGGET':  'mongo_get (ms)',
    'MNGADD':  'mongo_add (ms)',
    'LIPGET':  'long_ip_base_get (ms)',
    'LIPADD':  'long_ip_base_add (ms)',
    'GEO':     'georbl (ms)',
    'SPST':    'spamstat (ms)',
    'RSLV':    'resolv (ms)',
    'TOTAL':   'count'
}
th = '</b></th><th><b>'.join(nginx.keys())
txt = """
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <title>Complaints</title>
  </head>
<body>
  <h2>Statistics for date: {0} (<a href="https://so-web.n.yandex-team.ru/plotnik/dashboard?name=mailsearch&starttime={0}+00%3A00&endtime={0}+23%3A59">Plotnik</a>)</h2>
  <table border="1" width="100%" height="100%" cellspacing="0" cellpadding="4">
    <thead>
      <tr>
        <th colspan="{1}">nginx codes</th>
      </tr>
      <tr>
        <th>&nbsp;</th><th><b>total</b></th><th><b>{2}</b></th>
      </tr>
    </thead><tbody>\n""".format(DATE, 2 + len(nginx.keys()), th)
for c in ['1xx', '2xx', '3xx', '4xx', '400', '404', '499', '5xx']:
    txt += "<tr><td>%s (count):</td>" % c
    txt2, p_total = '', 0
    for host in nginx.keys():
        p_total += nginx[host][c]
        txt2 += "<td align='center'>%d / %.2f %%</td>" % (nginx[host][c], nginx[host][c] / nginx[host]['total'] * 100 if nginx[host]['total'] > 0 else 0)
    txt += "<td align='center'>%d / %.2f %%</td>" % (p_total, p_total / total * 100 if total > 0 else 0)
    txt += txt2 + '</tr>\n'
txt += '</tbody></table>\n<p>&nbsp;</p>\n'
txt += """<table border="1" width="100%" height="100%" cellspacing="0" cellpadding="4">
    <thead>
      <tr>
        <th colspan="{0}">nginx answer time</th>
      </tr>
      <tr>
        <th>&nbsp;</th><th><b>total</b></th><th><b>{1}</b></th>
      </tr>
    </thead><tbody>\n""".format(2 + len(nginx.keys()), th)
for c in ['0x003', '003x01', '01x05', '05x1', '1xinf']:
    txt += "<tr><td>%s (count):</td>" % d[c]
    txt2, p_total = '', 0
    for host in nginx.keys():
        p_total += nginx[host][c]
        txt2 += "<td align='center'>%d / %.2f %%</td>" % (nginx[host][c], nginx[host][c] / nginx[host]['total'] * 100 if nginx[host]['total'] > 0 else 0)
    txt += "<td align='center'>%d / %.2f %%</td>" % (p_total, p_total / total * 100 if total > 0 else 0)
    txt += txt2 + '</tr>\n'
txt += '</tbody></table>\n<p>&nbsp;</p>\n'
txt += """<table border="1" width="100%" height="100%" cellspacing="0" cellpadding="4">
    <thead>
      <tr>
        <th colspan="2">msearchproxy</th>
      </tr>
      <tr>
        <th>&nbsp;</th><th><b>total</b></th>
      </tr>
    <thead><tbody>\n"""
txt += "<tr><td>spam (count):</td><td align='center'>%.2f %%</td></tr>\n" % (stat['spam'] / stat['spam_total'] * 100 if stat['spam_total'] > 0 else 0)
txt += "<tr><td>ham (count):</td><td align='center'>%.2f %%</td></tr>\n" % (stat['ham'] / stat['spam_total'] * 100 if stat['spam_total'] > 0 else 0)
for c in ['MNGGET', 'MNGADD', 'LIPGET', 'LIPADD', 'FULLCHK', 'GEO', 'SPST', 'RSLV']:
    txt += "<tr><td>%s:</td><td align='center'>%.3f</td></tr>\n" % (d[c], stat[c] / stat['TOTAL'] if stat['TOTAL'] > 0 else 0)
txt += '</tbody></table></body></html>'

msg = email.message.Message()
msg['Content-Type'] = 'text/html'
msg.set_payload(txt, 'utf-8')
msg['From'] = CFG['robot']
msg['To'] = CFG['report']
msg['Subject'] = "[%s] MSearch-proxy: Daily report" % DATE
sendEmail(msg.as_string())
