#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import json
import argparse
import datetime
from collections import defaultdict
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    apply_replacements,
    date_range,
    get_cluster,
    get_driver,
    get_stat_headers,
    get_dates_from_stat,
    get_date,
    StatPusher,
    YqlRunner,
)


CHATS_ROOT = "//home/mssngr/private/current_state_data/chats"
USERS_ROOT = "//home/mssngr/private/current_state_data/chat_members"
log_roots = [
    "//home/mssngr/squeeze/sent_messages",
    "//logs/xivahub-log/1d",
    "//logs/xivaserver-access-log/1d",
    CHATS_ROOT,
    USERS_ROOT,
]
TITLE = "[MMA-4697] Daily Party Video Stats | YQL"


def tables_list_filter(tables_list):
    dct = defaultdict(list)
    for table in tables_list:
        dct[get_date(table)].append(table)
    dct = {k: max(v) for k, v in dct.items()}
    return sorted(dct.values())


def process_dates(dates, cluster, args, chats_tables, users_tables):
    proxy = os.environ["YT_PROXY"]
    yc = YqlClient(db=proxy.lower(), token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(client=yc, title=TITLE)
    with open("party_daily_stats_report_stub.sql", "rb") as f:
        query = f.read().decode("utf8")
    query = apply_replacements(
        query,
        {
            "@[pool]": args["pool"],
            "@[output_root]": args["root"],
            "@[date_from]": str(dates[0]),
            "@[date_to]": str(dates[-1]),
            "@[date_to]": str(dates[-1]),
            "@[report]": args["report"],
            "@[chats_tables_list]": json.dumps(chats_tables, indent=4),
            "@[users_tables_list]": json.dumps(users_tables, indent=4),
        },
    )
    yr.run(query, wait=True)


def parse_datetime(s):
    return datetime.datetime.strptime(s.replace("T", " "), "%Y-%m-%d %H:%M:%S")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument(
        "--report", default="Video/Others/Strm/party_daily_stats"
    )
    parser.add_argument(
        "--root",
        default="//home/videoquality/vh_analytics/party_daily_stats_report",
    )
    parser.add_argument("--from")
    parser.add_argument("--redo", action="store_true")
    # parser.add_argument("--notmp", action="store_true")
    parser.add_argument("--to")
    args = vars(parser.parse_args())

    from_ = args.get("from")
    to_ = args.get("to")

    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client
    chats_tables = []
    users_tables = []

    if from_ and to_:
        dates = sorted(date_range(from_, to_))
    else:
        dates_from_stat = get_dates_from_stat(
            report=args["report"], headers=get_stat_headers(), scale="d"
        )
        last_date = dates_from_stat[-1]
        print("last date: {}".format(last_date))
        availables = None
        tables_dict = {}
        for log_root in log_roots:
            tables = [
                x
                for x in yt.search(
                    root=log_root,
                    node_type=["table", "link"],
                    path_filter=lambda x: bool(get_date(x))
                    and get_date(x) > last_date,
                )
            ]
            print(
                "{} fresh dates: {}".format(
                    log_root, ",".join(str(get_date(x)) for x in tables)
                )
            )
            tables_dict[log_root] = tables
            set_ = {get_date(x) for x in tables}
            if availables is None:
                availables = set_
            else:
                availables &= set_
        availables = sorted(availables)
        if availables:
            print("last available date: {}".format(max(availables)))
            date_from = min(availables)
            date_to = max(availables)
            chats_tables = tables_list_filter(
                sorted(
                    [
                        x
                        for x in tables_dict[CHATS_ROOT]
                        if get_date(x) >= date_from and get_date(x) <= date_to
                    ]
                )
            )
            users_tables = tables_list_filter(
                sorted(
                    [
                        x
                        for x in tables_dict[USERS_ROOT]
                        if get_date(x) >= date_from and get_date(x) <= date_to
                    ]
                )
            )
            dates = [date_from, date_to]
        else:
            dates = []

    if dates:
        if not chats_tables:
            chats_tables = tables_list_filter(
                sorted(
                    yt.search(
                        root=CHATS_ROOT,
                        node_type="table",
                        path_filter=lambda x: bool(get_date(x))
                        and get_date(x) >= dates[0]
                        and get_date(x) <= dates[-1],
                    )
                )
            )
            users_tables = tables_list_filter(
                sorted(
                    yt.search(
                        root=USERS_ROOT,
                        node_type="table",
                        path_filter=lambda x: bool(get_date(x))
                        and get_date(x) >= dates[0]
                        and get_date(x) <= dates[-1],
                    )
                )
            )
            for tables in (chats_tables, users_tables):
                min_table = min(tables)
                max_table = max(tables)
                if get_date(min_table) > dates[0]:
                    raise Exception(
                        "oldest table {} doesn't fit into specified from date".format(
                            min_table
                        )
                    )
                if get_date(max_table) < dates[-1]:
                    raise Exception(
                        "youngest table {} doesn't fit into specified to date".format(
                            max_table
                        )
                    )
        print("processing {}".format(dates))
        process_dates(dates, cluster, args, chats_tables, users_tables)
    else:
        print("no dates to process")


if __name__ == "__main__":
    main()
