#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import yt.wrapper as yt
from datetime import datetime, timedelta
import os
from yql.api.v1.client import YqlClient

geocube_path = "home/geosearch-prod/geocube/1d"
result_path = "home/geoadv/statistics/audience"


def get_tasks():
    sorted_list = sorted(yt.list("//" + result_path), reverse=True)
    if len(sorted_list) > 0:
        last_date = sorted_list.pop()
    else:
        last_date = (datetime.now() - timedelta(days=31)).strftime('%Y-%m-%d')

    input_dates = [date for date in sorted(yt.list("//" + geocube_path)) if date > last_date]

    result = []

    for date in input_dates:
        src_date_path = geocube_path + "/" + date
        sources = yt.list("//" + src_date_path)
        for source in sources:
            result.append({
                "date": date,
                "source": source
            })
    return result


def proc_date_src(task):
    logging.debug("Processing {date} {source}".format(
        date=task["date"],
        source=task["source"]
    ))

    path_postfix = "/" + task["date"] + "/" + task["source"]

    yt.create("map_node", "//" + result_path + "/" + task["date"], ignore_existing=True)

    src_yql_path = "hahn.[" + geocube_path + path_postfix + "]"
    result_yql_path = "hahn.[" + result_path + path_postfix + "]"

    logging.debug("Source %s" % src_yql_path)
    logging.debug("Target %s" % result_yql_path)

    query = (
        "insert into " +
        result_yql_path +
        " with truncate select count(distinct yandexuid) as cnt, request_region from " +
        src_yql_path +
        " where request_region is not null GROUP BY request_region"
    )

    yql_client = YqlClient(db="hahn", token=os.getenv("YQL_TOKEN"))
    request = yql_client.query(query)
    request.run()
    print(request)


if __name__ == "__main__":
    yt.config['pickling']['module_filter'] = lambda module: 'hashlib' not in getattr(module, '__name__', '') and \
                                                            'urllib3' not in getattr(module, '__name__', '')

    yt.update_config({'pickling': {'python_binary': '/skynet/python/bin/python'}})
    logging.basicConfig(level=logging.DEBUG)

    yt.create("map_node", "//" + result_path, ignore_existing=True)
    for task in get_tasks():
        proc_date_src(task)
