#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import argparse
import datetime
import subprocess
import codecs
import json
from collections import defaultdict
from boto.s3.connection import S3Connection
from yt import yson
from yt.yson.yson_types import YsonEntity
import yt.wrapper as yt
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    get_date,
    YqlRunner,
    get_cluster,
    apply_replacements,
)


TITLE = "[MMA-4376] Internet Index Research Update | YQL"
ROOT = "//home/videoquality/vh_analytics/mma-4376-prod/research"


class FixedOffset(datetime.tzinfo):
    def __init__(self, offset):
        self.__offset = datetime.timedelta(minutes=offset)

    def utcoffset(self, dt=None):
        return self.__offset

    def dst(self, dt=None):
        return datetime.timedelta(0)


utc = FixedOffset(0)
msk = FixedOffset(60 * 3)


def _get_dates_set(root, func=None):
    if func is None:
        func = lambda x: bool(get_date(x))
    return {
        get_date(x)
        for x in yt.search(root=root, node_type="table", path_filter=func)
    }


def parse_dt(s):
    s = s.split(".")[0]
    return (
        datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%S")
        .replace(tzinfo=utc)
        .astimezone(msk)
    ).date()


def get_last_date(bucket):
    config = bucket.get_key("2020/internet-rating/config.json")
    content = config.get_contents_as_string()
    return get_date(json.loads(content)["update_date"])


def uformat(x):
    if isinstance(x, str):
        return x.decode("utf8", errors="replace")
    else:
        return unicode(x)


def tabulate(*args):
    return u"\t".join(map(uformat, args)) + u"\n"


def read_tsv(table, filename, columns):
    f = codecs.open(filename, "w", "utf8")
    f.write(tabulate(*columns))
    for i, rec in enumerate(yt.read_table(table)):
        result = []
        for col in columns:
            if col == u"sort":
                result.append(uformat(i))
            else:
                result.append(uformat(rec[col]))
        f.write(tabulate(*result))
    f.close()


def s3_upload(bucket, path, filename):
    print("uploading to {}...".format(path))
    with open(filename, "rb") as f:
        content = f.read()
    key = bucket.get_key(path)
    key.set_contents_from_string(content)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--s3-host", default="s3.mdst.yandex.net")
    parser.add_argument("--force")
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument("--root", "-r", default=ROOT)
    args = vars(parser.parse_args())

    conn = S3Connection(
        host=args["s3_host"],
        aws_access_key_id=os.environ["ACCESS_KEY_ID"],
        aws_secret_access_key=os.environ["ACCESS_KEY"],
    )
    buckets = list(conn.get_all_buckets())
    bucket = [x for x in buckets if x.name == u"milab"][0]

    last_date = get_last_date(bucket)
    print("last date: {}".format(last_date))

    available_dates = _get_dates_set(
        args["root"],
        func=lambda x: bool(get_date(x)) and not x.endswith("_distribution"),
    )

    last_available_date = max(available_dates)
    print("last available date: {}".format(last_available_date))

    if last_available_date > last_date or args["force"]:
        if args["force"]:
            last_available_date = get_date(args["force"])
        table = "{}/{}".format(args["root"], last_available_date)
        output_root = "{}_prod_tables/{}".format(
            args["root"], last_available_date
        )
        print("generating data...")
        yc = YqlClient(
            db=os.environ["YT_PROXY"], token=os.environ["YQL_TOKEN"]
        )
        yr = YqlRunner(yc, title=TITLE)
        with codecs.open("milab_stub.sql", "r", "utf8") as f:
            query = f.read()
        query = apply_replacements(
            query,
            {
                "@[pool]": args["pool"],
                "@[input_table]": table,
                "@[output_root]": output_root,
            },
        )
        yr.run(query)
        distribution_table = "{}/{}_distribution".format(
            args["root"], last_available_date
        )
        map_desktop = "{}/map_desktop".format(output_root)
        map_mobile = "{}/map_mobile".format(output_root)
        prov_desktop = "{}/prov_desktop".format(output_root)
        prov_mobile = "{}/prov_mobile".format(output_root)
        read_tsv(
            distribution_table,
            "distribution.tsv",
            [u"is_mobile", u"score_round", u"users"],
        )
        read_tsv(
            map_desktop,
            "map_desktop.tsv",
            [
                u"sort",
                u"name_clean",
                u"score_30",
                # u"provider_3",
                # u"provider_2",
                # u"provider_1",
                u"latitude",
                u"longitude",
            ],
        )
        read_tsv(
            map_mobile,
            "map_mobile.tsv",
            [
                u"sort",
                u"name_clean",
                u"score_30",
                # u"provider_3",
                # u"provider_2",
                # u"provider_1",
                u"latitude",
                u"longitude",
            ],
        )
        # read_tsv(
        #     prov_desktop,
        #     "prov_desktop.tsv",
        #     [u"name_clean", u"sort", u"score_30", u"isp"],
        # )
        # read_tsv(
        #     prov_mobile,
        #     "prov_mobile.tsv",
        #     [u"name_clean", u"sort", u"score_30", u"isp"],
        # )
        with open("update_date.json", "w") as f:
            f.write('{{"update_date": "{}"}}'.format(last_available_date))
        s3_upload(
            bucket,
            "2020/internet-rating/distribution/data/data.tsv",
            "distribution.tsv",
        )
        s3_upload(
            bucket,
            "2020/internet-rating/map/data/data-desktop.tsv",
            "map_desktop.tsv",
        )
        s3_upload(
            bucket,
            "2020/internet-rating/map/data/data-mobile.tsv",
            "map_mobile.tsv",
        )
        # s3_upload(
        #     bucket,
        #     "2020/internet-rating/providers/data/data-desktop.tsv",
        #     "prov_desktop.tsv",
        # )
        # s3_upload(
        #     bucket,
        #     "2020/internet-rating/providers/data/data-mobile.tsv",
        #     "prov_mobile.tsv",
        # )
        s3_upload(
            bucket, "2020/internet-rating/config.json", "update_date.json"
        )
    else:
        print("no need to do anything")
        sys.exit(0)


if __name__ == "__main__":
    main()
