#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import argparse
import datetime
import subprocess
import codecs
import json
from collections import defaultdict
from yt import yson
from yt.yson.yson_types import YsonEntity
import yt.wrapper as yt
from videolog_common import get_date


ROOT = "//home/videoquality/vh_analytics/mma-4376-prod/json_data"


class FixedOffset(datetime.tzinfo):
    def __init__(self, offset):
        self.__offset = datetime.timedelta(minutes=offset)

    def utcoffset(self, dt=None):
        return self.__offset

    def dst(self, dt=None):
        return datetime.timedelta(0)


utc = FixedOffset(0)
msk = FixedOffset(60 * 3)


def parse_dt(s):
    s = s.split(".")[0]
    return (
        datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%S")
        .replace(tzinfo=utc)
        .astimezone(msk)
    ).date()


def get_tup(rec):
    return (rec["region_id"], rec["fielddate"], rec["is_mobile"])


def _get_dates_set(yt, root, func=None):
    if func is None:
        func = lambda x: bool(get_date(x))
    return {
        get_date(x)
        for x in yt.search(root=root, node_type="table", path_filter=func)
    }


def convert_defdict(dct):
    if isinstance(dct, dict):
        return {convert_defdict(k): convert_defdict(v) for k, v in dct.items()}
    return dct


def generate_data(recs, args):
    mobile = {
        "geo_map": {},
        "history": defaultdict(lambda: defaultdict(list)),
    }
    desktop = {
        "geo_map": {},
        "history": defaultdict(lambda: defaultdict(list)),
    }
    # count = 0
    # prev = None
    for rec in recs:
        # rec = wrap_yson(rec_)
        # rec = rec_
        # if prev and get_tup(rec) != get_tup(prev):
        #     count = 0
        # elif count == 11:
        #     continue
        region_id = str(rec["region_id"])
        result = mobile if rec["is_mobile"] else desktop
        ts = (
            int(
                datetime.datetime.strptime(rec["fielddate"], "%Y-%m-%d")
                .date().strftime("%s")
            )
            - 10800
        ) * 1000
        value = rec[args["field"]]
        if value is None or isinstance(value, YsonEntity):
            sys.stderr.write("error on: " + str(rec) + "\n")
            continue
        if value < 0.1:
            value = 0.1
        value = round(value, 1)
        result["history"][region_id][str(ts)].append(
            {
                "operator": rec["isp"],
                "value": value,
                "points": rec["requests"],
                "users": rec["users"],
            }
        )
        # count += 1
        # prev = rec
    desktop["history"] = convert_defdict(desktop["history"])
    mobile["history"] = convert_defdict(mobile["history"])
    return desktop, mobile


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--last-dates", "-l")
    parser.add_argument("--output", "-o")
    parser.add_argument("--force", action="store_true")
    parser.add_argument("--root", "-r", default=ROOT)
    parser.add_argument("--field", "-f", default="score_30_median_median")
    args = vars(parser.parse_args())

    with open(args["last_dates"]) as f:
        lines = f.read().strip().split("\n")
        dates = [parse_dt(lines[0]), parse_dt(lines[1])]

    last_date = min(dates) - datetime.timedelta(days=1)  # это дата обновления базы, а не дата, за которую данные
    print("db last updated with data for: {}".format(last_date))

    available_dates = _get_dates_set(yt, args["root"])  # а тут даты, за которые данные

    last_available_date = max(available_dates)
    print("last available date: {}".format(last_available_date))

    if last_available_date > last_date or args["force"]:
        table = "{}/{}".format(args["root"], last_available_date)
        print("generating data...")
        desktop, mobile = generate_data(yt.read_table(table), args)
    else:
        print("no need to do anything")
        sys.exit(0)

    with codecs.open("desktop.json", "w") as f:
        json.dump(desktop, f, indent=4, ensure_ascii=False, sort_keys=True)
    with codecs.open("mobile.json", "w") as f:
        json.dump(mobile, f, indent=4, ensure_ascii=False, sort_keys=True)
    subprocess_args = ["zip", "output.zip", "desktop.json", "mobile.json"]
    subprocess.call(subprocess_args)
    subprocess.call(["mv", "output.zip", args["output"]])


if __name__ == "__main__":
    main()
