#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import argparse
import datetime
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    apply_replacements,
    date_range,
    get_cluster,
    get_driver,
    get_stat_headers,
    get_dates_from_stat,
    get_date,
    StatPusher,
    YqlRunner,
)

LOG_ROOT = "//home/videoquality/vh_analytics/ad_squeeze"
TITLE = "[MMA-4767] Ad Cube Regular"
SCHEDULE_DATE_STUB = """
select max(report_date) as fielddate
from `//home/comdep-analytics/YAN/cubes/ado/stats/schedule_resource_stats`
"""


def get_schedule_last_date(args):
    proxy = os.environ["YT_PROXY"]
    yc = YqlClient(db=proxy.lower(), token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(client=yc, title=args["title"])
    req = yr.run(SCHEDULE_DATE_STUB)
    res = get_date(yr.get_results(req)[0]["fielddate"].decode("utf8"))
    return res


def process_dates(dates, cluster, args):
    proxy = os.environ["YT_PROXY"]
    yc = YqlClient(db=proxy.lower(), token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(client=yc, title=args["title"])
    with open(args["query_stub"], "rb") as f:
        query = f.read().decode("utf8")
    output_table = "{}/{}_{}".format(args["root"], dates[0], dates[-1])
    query = apply_replacements(
        query,
        {
            "@[pool]": args["pool"],
            "@[output_table]": output_table,
            "@[input_root]": args["input_root"],
            "@[date_from]": str(dates[0]),
            "@[date_to]": str(dates[-1]),
            "@[report]": args["report"],
        },
    )
    for mode in ("output", "upsert"):
        if mode in args["output_mode"]:
            query = apply_replacements(
                query, {"{}*/".format(mode): "", "/*{}".format(mode): ""}
            )
    yr.run(
        query,
        wait=True,
        attachments=[
            {
                "path": "arcadia/analytics/videolog/strm-stats/strm_cube_2/stability/stability_common.sql"
            },
            {
                "path": "arcadia/analytics/videolog/strm-stats/mma_4767_ad_cube/lib_humans_0.sql"
            },
            {
                "path": "arcadia/analytics/videolog/strm-stats/mma_4767_ad_cube/lib_humans_1.sql"
            },
        ],
    )
    if "stat_push" in args["output_mode"]:
        sp = StatPusher(cluster, report=args["report"])
        print("pushing to {}".format(args["report"]))
        kwargs = {"replace_mask": "fielddate"} if args.get("redo") else {}
        sp.push(output_table, **kwargs)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument(
        "--root", default="//home/videoquality/vh_analytics/ad_cube"
    )
    parser.add_argument("--report", default="Video/Others/VideoAdCube2")
    parser.add_argument("--input_root", default=LOG_ROOT)
    parser.add_argument("--query_stub", default="ad_cube_stub.sql")
    parser.add_argument("--output_mode", default="output,stat_push")
    parser.add_argument("--from")
    parser.add_argument("--title", default=TITLE)
    parser.add_argument("--redo", action="store_true")
    parser.add_argument("--to")
    args = vars(parser.parse_args())
    args["title"] += " | YQL"

    from_ = args.get("from")
    to_ = args.get("to")

    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client

    if from_ and to_:
        dates = sorted(date_range(from_, to_))
    else:
        dates_from_stat = get_dates_from_stat(
            report=args["report"], headers=get_stat_headers()
        )
        last_date = dates_from_stat[-1]
        print("last date: {}".format(last_date))
        last_available_date = sorted(
            get_date(x)
            for x in yt.search(
                root=args["input_root"],
                node_type="table",
                path_filter=lambda x: bool(get_date(x)),
            )
        )[-1]
        if "DayBudget" in args["report"]:
            last_available_date_from_schedule = get_schedule_last_date(args)
            last_available_date = min(
                last_available_date, last_available_date_from_schedule
            )
        print("last available date: {}".format(last_available_date))
        if last_available_date > last_date:
            dates = sorted(
                date_range(
                    last_date + datetime.timedelta(days=1), last_available_date
                )
            )
        else:
            dates = []

    if dates:
        chunks = []
        dates = sorted(dates, reverse=True)
        n = 3
        while dates:
            chunk, dates = dates[:n], dates[n:]
            chunks.append(sorted(chunk))
        for chunk in chunks:
            print("processing {}".format(chunk))
            process_dates(chunk, cluster, args)
    else:
        print("no dates to process")


if __name__ == "__main__":
    main()
