#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import argparse
import datetime
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    apply_replacements,
    date_range,
    get_cluster,
    get_driver,
    get_stat_headers,
    get_dates_from_stat,
    get_date,
    StatPusher,
    YqlRunner,
)

TITLE = "[CPMADV-122] Categories Dash | YQL"

log_roots = [
    "//cooked_logs/bs-chevent-cooked-log/1d",
    "//home/videoquality/vh_analytics/targeting",
    "//home/videoquality/vh_analytics/phrases_stat"
]

def get_max_date(args, yt):
    max_dates = []
    for root in log_roots:
        max_date = max(get_date(x) for x in yt.search(root=root, path_filter=lambda x: bool(get_date(x))))
        print(f"{root} max date: {max_date}")
        max_dates.append(max_date)
    result = min(max_dates)
    print(f"max available date: {result}")
    return result


def process_date(date, args):
    proxy = os.environ["YT_PROXY"]
    yc = YqlClient(db=proxy.lower(), token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(client=yc, title=TITLE)
    with open("cpmadv_122_categories_stub.sql", "rb") as f:
        query = f.read().decode("utf8")
    query = apply_replacements(
        query,
        {
            "@[pool]": args["pool"],
            "@[date]": str(date),
        },
    )
    print(f"processing {date}...")
    yr.run(
        query,
        wait=True,
    )


def get_available_dates(yt):
    result = None
    for log in log_roots:
        set_ = {get_date(x) for x in yt.search(log) if get_date(x)}
        if result is not None:
            result &= set_
        else:
            result = set_
    return result


def next_wd(date, wd=0):
    wd_c = date.weekday()
    if wd_c == wd:
        return date
    if wd_c < wd:
        return date + datetime.timedelta(days=wd - wd_c)
    return date + datetime.timedelta(days=7 - wd_c + wd)


def make_date_chunks(dates):
    min_date = min(dates)
    max_date = max(dates)
    result = []
    mon = next_wd(min_date, 0)
    while mon <= max_date:
        sun = next_wd(mon, 6)
        if sun <= max_date:
            result.append((mon, sun))
        mon += datetime.timedelta(days=7)
    return result[::-1]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument("--title", default=TITLE)
    parser.add_argument("--from")
    parser.add_argument("--fake_last_date")
    parser.add_argument("--redo", action="store_true")
    parser.add_argument("--to")
    args = vars(parser.parse_args())

    from_ = args.get("from")
    to_ = args.get("to")

    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client

    if from_ and to_:
        dates = date_range(from_, to_)
    else:
        if args["fake_last_date"]:
            last_available_date = get_date(args["fake_last_date"])
        else:
            last_available_date = get_max_date(args, yt)
        if last_available_date == datetime.date.today():
            last_available_date = datetime.date.today() - datetime.timedelta(days=1)
        last_date = get_dates_from_stat(
            get_stat_headers(), "Video/Others/CPMADV-122/categories_money_report"
        )[-1]
        print(f"last available date: {last_available_date}")
        print(f"last date: {last_date}")
        if last_available_date > last_date:
            dates = sorted(
                date_range(
                    last_date + datetime.timedelta(days=1),
                    last_available_date,
                )
            )
        else:
            dates = []

    if dates:
        for date in dates:
            process_date(date, args)
    else:
        print("no dates to process")


if __name__ == "__main__":
    main()
