#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import argparse
import datetime
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    apply_replacements,
    date_range,
    get_cluster,
    get_driver,
    get_stat_headers,
    get_dates_from_stat,
    get_date,
    StatPusher,
    YqlRunner
)

DEFAULT_JOB_ROOT = "//home/videolog/rebufferings"
TITLE = "Rebufferings | YQL"
LOGS = {
    "desktop": "//logs/bar-navig-log/1d",
    "mobile": "//home/logfeller/logs/browser-metrika-mobile-log/1d"
}


def process_dates(dates, cluster, args):
    root = args["job_root"]
    output_table_raw = "{}/{}_raw".format(root, args["platform"])
    output_table_grouped = "{}/{}_grouped".format(root, args["platform"])
    proxy = os.environ["YT_PROXY"]
    yc = YqlClient(db=proxy, token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(yc, title=TITLE)
    with open("rebufferings_{}.sql".format(args["platform"]), "r") as f:
        query = f.read()
    query = apply_replacements(
        query,
        {
            "@[pool]": args["pool"],
            "@[output_table_raw]": output_table_raw,
            "@[output_table_grouped]": output_table_grouped,
            "@[date_from]": str(dates[0]),
            "@[date_to]": str(dates[-1]),
        },
    )
    yr.run(query)
    if not args["debug"]:
        get_driver(cluster).remove(output_table_raw)
    sp = StatPusher(cluster, report=args["report"])
    print("pushing to {}".format(args["report"]))
    sp.push(output_table_grouped)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument("--platform", required=True)
    parser.add_argument("--report")
    parser.add_argument("--from")
    parser.add_argument("--job_root")
    parser.add_argument("--to")
    parser.add_argument("--debug", action="store_true")
    args = vars(parser.parse_args())

    from_ = args.get("from")
    to_ = args.get("to")

    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client

    assert args.get("platform") in ("desktop", "mobile")
    if not args.get("report"):
        args["report"] = {
            "desktop": "Video/Others/Strm/Stability/RebufferingsDesktop",
            "mobile": "Video/Others/Strm/Stability/Rebufferings"
        }[args["platform"]]
    if not args.get("job_root"):
        args["job_root"] = DEFAULT_JOB_ROOT

    if from_ and to_:
        dates = sorted(date_range(from_, to_))
    else:
        dates_from_stat = get_dates_from_stat(
            report=args["report"], headers=get_stat_headers()
        )
        last_date = dates_from_stat[-1]
        last_available_date = sorted(
            get_date(x) for x in yt.search(
                root=LOGS[args["platform"]],
                node_type="table",
                path_filter=lambda x: bool(get_date(x))
            )
        )[-1]
        if last_available_date > last_date:
            dates = sorted(
                date_range(
                    last_date + datetime.timedelta(days=1), last_available_date
                )
            )
        else:
            dates = []

    if dates:
        chunks = []
        dates = sorted(dates, reverse=True)
        n = 3
        while dates:
            chunk, dates = dates[:n], dates[n:]
            chunks.append(sorted(chunk))
        for chunk in chunks:
            print("processing {}".format(chunk))
            process_dates(chunk, cluster, args)
    else:
        print("no dates to process")


if __name__ == "__main__":
    main()
