#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import os
import sys
import argparse
import itertools
import codecs
import datetime
from collections import Counter, defaultdict
from yql.api.v1.client import YqlClient
from videolog_common import (
    zen_ref_from_treatment,
    apply_replacements,
    date_range,
    yt_get_date_from_table as get_date,
    get_dates_from_stat,
    get_stat_headers,
    get_cluster,
    get_driver,
    StatPusher,
)
from nile.api.v1 import clusters

TITLE = "Selrank Stats | YQL"
default_jobroot = "//home/videolog/selrank_stats"
root = None
PUBLISH_DESKTOP = "//home/videolog/selrank_stats/additive_both_fields_wo_threshold_schematized"
PUBLISH_MOBILE = "//home/videolog/selrank_stats/additive_mobile_both_fields_wo_threshold_schematized"


def process_dates(
    dates,
    cluster,
    yql_client,
    pool,
    debug=False,
    root=None,
    platform="desktop",
):
    if len(dates) == 1:
        date_s = str(dates[0])
    else:
        date_s = "{}_{}".format(dates[0], dates[-1])
    date_from = str(min(dates))
    date_to = str(max(dates))
    date_threshold = str(max(dates) - datetime.timedelta(days=180))
    additive_table = "{}/additive_{}".format(root, platform)
    tmp_table = "{}/{}_preaggr_{}".format(root, date_s, platform)
    publish_table = (
        PUBLISH_DESKTOP if platform == "desktop" else PUBLISH_MOBILE
    )

    if not get_driver(cluster).exists(tmp_table):
        with codecs.open("{}_stub.sql".format(platform), "r", "utf8") as f:
            query = f.read()
        with codecs.open("prel_finalize.sql", "r", "utf8") as f:
            prel_finalize = f.read()
        query = query + prel_finalize
        base_replacements = [
            ("@date_from", date_from),
            ("@date_to", date_to),
            ("@pool", pool),
            ("@output_table", tmp_table),
        ]
        query = apply_replacements(query, base_replacements)

        req = yql_client.query(query, title=TITLE, syntax_version=1)
        req.run()
        req.wait_progress()
        if req.status != "COMPLETED":
            raise Exception("request {} has failed".format(req.share_url))

    with codecs.open("additive_reduce.sql", "r", "utf8") as f:
        query = f.read()
    base_replacements = [
        ("@pool", pool),
        ("@tmp_table", tmp_table),
        ("@additive_table", additive_table),
        ("@publish_table", publish_table),
        ("@date_threshold", date_threshold),
        ("@date_to", date_to),
    ]
    query = apply_replacements(query, base_replacements)
    req = yql_client.query(query, title=TITLE, syntax_version=1)
    req.run()
    req.wait_progress()
    if req.status != "COMPLETED":
        raise Exception("request {} has failed".format(req.share_url))

    if not debug:
        try:
            get_driver(cluster).remove(tmp_table)
            print("removed {}".format(tmp_table))
        except:
            print("unable to remove {}".format(tmp_table))


def main():
    global root
    parser = argparse.ArgumentParser()
    parser.add_argument("--from", default=None)
    parser.add_argument("--to", default=None)
    parser.add_argument("--pool", default=None)
    parser.add_argument(
        "--job_root", default="//home/videolog/selrank_stats_beta"
    )
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--replace_mask", default=None)
    parser.add_argument("--title", default=TITLE)
    parser.add_argument("--platform", default="desktop")
    args = parser.parse_args()
    args.templates = {"tmp_root": args.job_root}

    cluster = get_cluster(clusters, args)
    yql_client = YqlClient(
        db=os.environ["YT_PROXY"], token=os.environ["YQL_TOKEN"]
    )

    root = args.job_root
    platform = args.platform

    from_ = getattr(args, "from")
    to_ = getattr(args, "to")

    publish_table = (
        PUBLISH_DESKTOP if platform == "desktop" else PUBLISH_MOBILE
    )
    additive_table = "{}/additive_{}".format(root, platform)

    if from_ and to_:
        dates = sorted(date_range(from_, to_))
    else:
        logs_dir = (
            "//logs/bar-navig-log/1d"
            if platform == "desktop"
            else "//home/logfeller/logs/browser-metrika-mobile-log/1d"
        )
        yt = get_driver(cluster).client
        last_date = get_date(
            list(get_driver(cluster).read(additive_table + "_last_date"))[
                0
            ].last_date
        ) or get_date("2019-03-19")
        if last_date >= datetime.date.today():
            new_last_date = datetime.date.today() - datetime.timedelta(days=2)
            print(
                "date {} is broken, setting it to {}".format(
                    last_date, new_last_date
                )
            )
            last_date = new_last_date
        print("last date from table attribute: {}".format(last_date))
        last_available_date = min(
            get_date(sorted(yt.search(root=logs_dir, node_type="table"))[-1]),
            datetime.date.today() - datetime.timedelta(days=1),
        )
        print("last date from tables: {}".format(last_available_date))
        if last_date == last_available_date:
            print("no tables to process, exiting")
            sys.exit(0)
        dates = sorted(
            date_range(
                last_date + datetime.timedelta(days=1), last_available_date
            )
        )

    print("processing {}".format(dates))
    if dates:
        process_dates(
            dates,
            cluster,
            yql_client,
            args.pool,
            debug=args.debug,
            root=root,
            platform=platform,
        )
    last_date_publish = get_date(
        list(get_driver(cluster).read(publish_table + "_last_date"))[
            0
        ].last_date
    )
    last_date = get_date(
        list(get_driver(cluster).read(additive_table + "_last_date"))[
            0
        ].last_date
    ) or get_date("2019-03-19")
    if last_date_publish < last_date:
        job = cluster.job().env(yt_spec_defaults={"cpu_limit": 0.3})
        job.table(additive_table).map(renamer_mapper).put(publish_table)
        job.run()

        req = yql_client.query(
            'insert into `{}` with truncate\nselect "{}" as last_date'.format(
                publish_table + "_last_date", last_date
            )
        )
        req.run()
        req.wait_progress()
        if req.status != "COMPLETED":
            raise Exception("request {} has failed".format(req.share_url))


if __name__ == "__main__":
    main()
