import re
from collections import defaultdict
from mail.monitoring.common.timetail import timetail

TIMING_BUCKETS = (50, 100, 200, 300, 500, 1000, 2000, 3000, 5000, 10000, 20000)
NSLS_LOG_PATH = "/var/log/notsolitesrv/notsolitesrv.tskv"
HTTP_CLIENT_LOG_PATH = "/var/log/notsolitesrv/http_client.tskv"
TIMETAIL_FMT = "tskv"
SERVICES = ("BB", "MG", "BWLIST", "FIRSTLINE", "FURITA", "MDBSAVE", "MSEARCH", "MSETTINGS", "TUPITA")


def _get_http_client_stat(services, log, timefmt, time, timing_buckets, get_logs=timetail, raw_timings=False):
    timing_buckets = sorted(timing_buckets + (float("inf"),))
    info = defaultdict(lambda: {
        "http_codes": defaultdict(int),
        "timing_buckets": dict((t, 0) for t in timing_buckets),
        "timings": []
    })

    # tskv	tskv_format=mail-http_client-format-log	timestamp=2021-09-23T19:11:40.323442+03:00
    # session_id=0x7f419c0c43a0	y_context=bhufB2lAKh-pxfe19YQ-FIRSTLINE	host=firstline.mail.yandex.net
    # port=443	uri=firstline.mail.yandex.net:443/1/firstline?service=nsls&session_id=bhufB2lAKh-pxfe19YQ
    # status=200	resolve_time=0.000	connect_time=0.000	tls_time=0.000	total_time=0.018	error=success
    # bytes_out=85335	bytes_in=232	attempt=0
    for params in get_logs(log, fmt=timefmt, time=time):
        if not all(key in params for key in ("y_context", "status", "total_time")):
            continue
        srv = params["y_context"].split("-")[-1]
        if srv in services:
            info[srv]["http_codes"][int(params["status"])] += 1
            duration_ms = int(float(params["total_time"]) * 1000)
            for timing_ms in timing_buckets:
                if duration_ms < timing_ms:
                    info[srv]["timing_buckets"][timing_ms] += 1
                    break
            if raw_timings:
                info[srv]["timings"].append(float(params["total_time"]))
    return info


def get_http_stat(service, log=HTTP_CLIENT_LOG_PATH, timefmt=TIMETAIL_FMT, time=60, timing_buckets=TIMING_BUCKETS, get_logs=timetail,
                  raw_timings=False):
    if service not in SERVICES:
        raise ValueError("Unknown service: " + service)
    return _get_http_client_stat([service], log, timefmt, time, timing_buckets, get_logs=get_logs, raw_timings=raw_timings)[service]


def get_all_http_client_stat(log=HTTP_CLIENT_LOG_PATH, timefmt=TIMETAIL_FMT, time=60, timing_buckets=TIMING_BUCKETS, get_logs=timetail,
                             raw_timings=False):
    return _get_http_client_stat(SERVICES, log, timefmt, time, timing_buckets, get_logs=get_logs, raw_timings=raw_timings)


def get_reply_codes(log=NSLS_LOG_PATH, timefmt=TIMETAIL_FMT, time=60, get_logs=timetail):
    codes = defaultdict(int)
    # [2020-May-19 10:13:20.710335] tskv	tskv_format=mail-notsolitesrv-tskv-log	thread=139873637877504
    # unixtime=1589872400	timestamp=2020-05-19T10:13:20.710230+0300	level=debug	uniq_id=dwHkopE4dD-lhkPrzqZ
    # connection_id=dwHkopE4dD	envelope_id=lhkPrzqZ	message=send DATA_END response: 554 5.1.1 Unknown user; dwHkopE4dD-lhkPrzqZ\r\n
    answer_re = re.compile(r"send DATA_END response: (?P<code>\d{3}) \d\.\d\.\d ")

    for params in get_logs(log, fmt=timefmt, time=time):
        if "message" not in params:
            continue
        match = answer_re.search(params["message"])
        if match:
            codes[match.group("code")] += 1
    return codes


def get_msg_delay_stat(types, log=NSLS_LOG_PATH, timefmt=TIMETAIL_FMT, time=60, timing_buckets=TIMING_BUCKETS, get_logs=timetail, raw_timings=False):
    timing_buckets = sorted(timing_buckets + (float("inf"),))
    timings = defaultdict(lambda: dict((t, 0) for t in timing_buckets))
    if raw_timings:
        timings = defaultdict(list)

    # tskv	tskv_format=mail-notsolitesrv-tskv-log	thread=139920941233920	unixtime=1632413638
    # timestamp=2021-09-23T19:13:58.259461+0300	level=notice	uniq_id=v0dycPdNMB-2E10512BF7DC
    # connection_id=v0dycPdNMB	envelope_id=2E10512BF7DC	message=msg_delay email=ropotan@yandex.ru
    # mulcaid=320.mail:20501518.E5363812:1851512008423613638698747955 imap=false in=0.410 out=0.609 dlv=0.199
    # senders=0.000 uj=0.000 mulca=0.016 resolve=0.024 preprocess=0.000 meta_save_op=0.111 parse=0.002
    # where_name=DLV
    msgdelay_re = re.compile("msg_delay email=[^ ]+ mulcaid=[^ ]+ imap=(?P<imap_tag>[^ ]+) (?P<delay_str>.+)$")

    for params in get_logs(log, fmt=timefmt, time=time):
        if "message" not in params:
            continue
        match = msgdelay_re.search(params["message"])
        if match:
            if match.group("imap_tag") == "true":
                continue
            delays = {e[0]: float(e[1]) for e in [kv.split("=") for kv in match.group("delay_str").split()]}
            for type, duration in delays.iteritems():
                if type not in types:
                    continue

                if raw_timings:
                    # Some timings could be less zero:
                    timings[type].append(abs(duration))
                else:
                    duration_ms = duration * 1000
                    for timing_ms in timing_buckets:
                        if duration_ms < timing_ms:
                            timings[type][timing_ms] += 1
                            break
    return timings
