#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import os
import argparse
import datetime
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    apply_replacements,
    date_range,
    get_cluster,
    get_driver,
    get_stat_headers,
    get_dates_from_stat,
    get_date,
    StatPusher,
    YqlRunner
)

DEFAULT_JOB_ROOT = "//home/videolog/externalPlayerEvents"
TITLE = "External Player Events | YQL-CHYT"
QUERY_STUB = """
use chyt.hahn;

CREATE TABLE "{table}" ENGINE = YtTable()
AS
select
'{fielddate}' as fielddate,
EventName as event_name,
count(*) as `count`,
uniq(Yandexuid) as users,
countIf(Yandexuid == '') / count(*) as empty_yandexuid_share
from "//logs/jstracer-log/1d/{fielddate}"
where Service == 'StreamPlayer' and EventName like 'External%'
group by EventName
order by `count` desc
"""


def process_dates(dates, cluster, args):
    for date in dates:
        print("processing {}".format(date))
        output_table = "{}/{}".format(DEFAULT_JOB_ROOT, date)
        yc = YqlClient(token=os.environ["YQL_TOKEN"])
        yc.config.db = None
        assert yc.config.db is None
        yr = YqlRunner(yc, title=TITLE)
        query = QUERY_STUB.format(
            fielddate=date,
            table=output_table
        )
        driver = get_driver(cluster)
        if driver.exists(output_table):
            print("table {} already exists, removing...".format(output_table))
            driver.remove(output_table)
        yr.run(query, query_type="CLICKHOUSE")
        sp = StatPusher(cluster, report=args["report"])
        print("pushing to {}".format(args["report"]))
        sp.push(output_table)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument(
        "--report", default="Video/Others/Strm/ExternalPlayerEvents"
    )
    parser.add_argument("--from")
    parser.add_argument("--job_root")
    parser.add_argument("--to")
    args = vars(parser.parse_args())

    from_ = args.get("from")
    to_ = args.get("to")

    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client

    if not args.get("job_root"):
        args["job_root"] = DEFAULT_JOB_ROOT

    if from_ and to_:
        dates = sorted(date_range(from_, to_))
    else:
        dates_from_stat = get_dates_from_stat(
            report=args["report"], headers=get_stat_headers()
        )
        last_date = dates_from_stat[-1]
        last_available_date = sorted(
            get_date(x) for x in yt.search(
                root="//logs/jstracer-log/1d",
                node_type="table",
                path_filter=lambda x: bool(get_date(x))
            )
        )[-1]
        if last_available_date > last_date:
            dates = sorted(
                date_range(
                    last_date + datetime.timedelta(days=1), last_available_date
                )
            )
        else:
            dates = []

    if dates:
        process_dates(dates, cluster, args)
    else:
        print("no dates to process")


if __name__ == "__main__":
    main()
