#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import datetime
import argparse
import yt.wrapper as yt
from yql.api.v1.client import YqlClient
from videolog_common import YqlRunner, apply_replacements, get_date, date_range


OUTPUT_ROOT = "//home/videoquality/vh_analytics/mma-3568-tele2/reduced"

SOURCES = (
    "//logs/strm-perf-log/1d/{date}",
    "//logs/appmetrica-events-log/browser-metrika-mobile-log/1d/{date}",
    "//logs/strm-gogol-log/1d/{date}",
    "//logs/appmetrica-location-log/1d/{date}",
    "//cubes/video-strm/{date}/sessions",
)


def process_dates(date_from, date_to, args):
    print("processing dates from {} to {}...".format(date_from, date_to))
    yc = YqlClient(token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(yc, title="v2 runner | YQL")

    with codecs.open("prepare_data_new.sql", "r", "utf8") as f:
        stub = f.read()
    query = apply_replacements(
        stub,
        [
            ("@[date_from]", str(date_from)),
            ("@[date_to]", str(date_to)),
            ("@[cluster]", "hahn"),
            ("@[pool]", args["pool"]),
            ("@[parallel_operations_limit]", "8"),
            ("@[output_root]", args["root"]),
        ],
    )

    yr.run(
        query,
        attachments=[
            "helpers.sql",
            "new_algo_reducer.py",
            "sphere_v2.py",
            {
                "path": "analytics/videolog/strm-stats/strm_cube_2/stability/stability_common.sql"
            },
        ],
    )


def get_dates_to_process(args):
    ready_dates = [get_date(x) for x in list(yt.list(args["root"])) if get_date(x)]
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    if yesterday > max(ready_dates):
        init_range = date_range(max(ready_dates) + datetime.timedelta(days=1), yesterday)
    else:
        init_range = []
    print(f"init range: {init_range}")
    available_dates = sorted([
        d for d in init_range if all([
            yt.exists(x.format(date=d)) for x in SOURCES
        ])
    ])
    print(f"available dates: {available_dates}")
    return available_dates


def split_to_bunches(dates, bunch_size=3):
    result = []
    while dates:
        bunch, dates = dates[:bunch_size], dates[bunch_size:]
        result.append(bunch)
    return result


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--from")
    parser.add_argument("--to")
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument("--root", default=OUTPUT_ROOT)
    args = vars(parser.parse_args())

    if args["from"] or args["to"]:
        process_dates(args["from"], args["to"], args)
    else:
        dates_to_process = get_dates_to_process(args)
        bunches = split_to_bunches(dates_to_process)
        for bunch in bunches:
            process_dates(bunch[0], bunch[-1], args)


if __name__ == "__main__":
    main()
