#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import json
import argparse
import datetime
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    apply_replacements,
    date_range,
    get_cluster,
    get_driver,
    _check_non_empty,
    get_date,
    YqlRunner,
)

cube_root = "//cubes/video-strm"
ugc_root = "//home/videoquality/vh_analytics/ugc_stats"
TITLE = "VH UGC Stats | YQL"
ROOT = "//home/videoquality/vh_analytics/ugc_stats"


def process_dates(dates, args):
    output_table = "{}/{}_{}".format(args["root"], dates[0], dates[-1])
    proxy = os.environ["YT_PROXY"]
    yc = YqlClient(db=proxy.lower(), token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(client=yc, title=TITLE)
    with open("ugc_stats_regular.sql", "rb") as f:
        query = f.read().decode("utf8")
    query = apply_replacements(
        query,
        {
            "@[pool]": args["pool"],
            "@[root]": args["root"],
            "@[output_table]": output_table,
            # "@[tmp_table]": tmp_table,
            "@[date_from]": str(dates[0]),
            "@[date_to]": str(dates[-1]),
        },
    )
    attachments = [
        {
            "path": "analytics/videolog/strm-stats/strm_cube_2/ugc_stats_regular/get_ugc_meta.sql"
        },
        {
            "path": "analytics/videolog/strm-stats/strm_cube_2/ugc_stats_regular/ugc_stats.py"
        },
        {
            "path": "analytics/videolog/strm-stats/strm_cube_2/stability/stability_common.sql"
        }
    ]
    if args["local"]:
        attachments = [
            x["path"].split("/")[-1] for x in attachments
        ]
    yr.run(
        query,
        wait=True,
        attachments=attachments,
    )


def _check_non_empty(yt, table):
    return yt.exists(table) and yt.get_attribute(table, "row_count") > 0


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument("--root", default=ROOT)
    parser.add_argument("--from")
    parser.add_argument("--redo", action="store_true")
    parser.add_argument("--to")
    parser.add_argument("--output")
    parser.add_argument("--local", action="store_true")
    args = vars(parser.parse_args())

    from_ = args.get("from")
    to_ = args.get("to")

    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client

    if from_ and to_:
        dates = sorted(date_range(from_, to_))
    else:
        yesterday = datetime.date.today() - datetime.timedelta(days=1)
        weekago = yesterday - datetime.timedelta(days=7)
        missing_dates = set(
            x for x in date_range(weekago, yesterday) if
            not _check_non_empty(yt, "{}/{}".format(ugc_root, x))
        )
        print(
            "missing dates: {}".format(
                ",".join(sorted(map(str, missing_dates)))
            )
        )

        available_dates = set(
            get_date(x)
            for x in yt.search(
                root=cube_root,
                path_filter=lambda x: get_date(x) and x.endswith("sessions"),
            )
            if get_date(x) >= weekago
        )
        print(
            "available dates: {}".format(
                ",".join(sorted(map(str, available_dates)))
            )
        )
        dates = sorted(missing_dates & available_dates)

    if dates:
        print("processing {}".format(dates))
        process_dates(dates, args)
        json.dump(list(map(str, dates)), open(args["output"], "w"), indent=2)
    else:
        print("no dates to process")


if __name__ == "__main__":
    main()
