# -*- coding: utf-8 -*-

from datetime import datetime, timedelta
import json
import logging

import sandbox.sdk2 as sdk2
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.common.yql import run_batch
from sandbox.projects.Sovetnik.Filter.TrainingPool import queries
from sandbox.projects.Sovetnik.Filter.YandexuidFactors import del_tables


class SovetnikCollectTrainingPool(sdk2.Task):
    """
        сбор обучающего пула советника
    """
    class Parameters(sdk2.Parameters):

        part_show = sdk2.parameters.Float('доля показов (вещественное число от 0 до 1)', default_value=0.025)
        part_click = sdk2.parameters.Float('доля кликов (вещественное число от 0 до 1)', default_value=0.125)
        part_cart = sdk2.parameters.Float('доля корзин (вещественное число от 0 до 1)', default_value=1)

        result_dir = sdk2.parameters.String('директория результата', default_value='//home/geosearch/sovetnik/pool')
        # result_table = sdk2.parameters.String('таблица результата', default='tmp')
        ttl_daily = sdk2.parameters.Integer('Время хранения дневных таблиц', default_value=14)
        ttl_final = sdk2.parameters.Integer('Время хранения итоговых таблиц', default_value=14)

        delete_daily = sdk2.parameters.Bool('Очистить директорию daily', default_value=False)
        delete_final = sdk2.parameters.Bool('Очистить директорию final', default_value=False)
        format_result_table = sdk2.parameters.Bool('Даты результата в формате от сегодня(true) и абсолютные(false)', default=True)
        format_other_table = sdk2.parameters.Bool('Даты дневных таблиц в формате от результата(true) и абсолютные(false)', default=True)

        log_dir = sdk2.parameters.String('log dir', default_value='//logs/sovetnik-log/1d')
        log_from = sdk2.parameters.String('log от ', default_value="-7")
        log_to = sdk2.parameters.String('log до ', default_value="-1")

        redir_dir = sdk2.parameters.String('redir dir', default_value='//logs/sovetnik-redir-log/1d')
        buy_dir = sdk2.parameters.String('buy dir', default_value='//logs/sovetnik-buy-log/1d')
        view_search_table = sdk2.parameters.String('view_search таблица', default_value='//home/geosearch/sovetnik/stats/search_view/search_view')
        host_cat_table = sdk2.parameters.String('view_search таблица', default_value='//home/geosearch/sovetnik/stats/host_category/host_category')
        bottom_category_table = sdk2.parameters.String('bottom_category таблица', default_value='//home/geosearch/sovetnik/stats/category/category')

        with sdk2.parameters.Group("yandexuid parameters"):
            yauid_dir = sdk2.parameters.String('директория yandexuid', default_value='//home/geosearch/sovetnik/stats/yandexuid')
            yauid_data = sdk2.parameters.String('yandexuid дата', default_value="-1")

        with sdk2.parameters.Group("url parameters"):
            url_dir = sdk2.parameters.String('директория url', default_value='//home/geosearch/sovetnik/stats/url')
            url_data = sdk2.parameters.String('url дата', default_value="-1")

        count_host_level = sdk2.parameters.Integer('Число уровней хоста', default_value=5)

        YQL_token_owner = sdk2.parameters.String('владелец токена YQL в vault', default_value='gav1995')
        YQL_token_name = sdk2.parameters.String('владелец токена YQL в vault', default_value='YQL_token')

    class Requirements(sdk2.Task.Requirements):
        environments = [
            PipEnvironment('yandex-yt'),
            PipEnvironment('yql')
            ]

    def on_execute(self):
        self.init()
        self.prepare()
        self.create_requests()
        logging.info("first batch started")
        run_batch(self.first_batch, sdk2.Vault.data(self.Parameters.YQL_token_owner, self.Parameters.YQL_token_name), 'hahn')
        logging.info("first batch finished successful")
        logging.info("second batch started")
        run_batch(self.second_batch, sdk2.Vault.data(self.Parameters.YQL_token_owner, self.Parameters.YQL_token_name), 'hahn')
        logging.info("second batch finished successful")

    def create_daily_request(self, table):
        date = datetime.strptime(table, '%Y-%m-%d')

        if (self.Parameters.format_other_table):
            url_table = (date + timedelta(int(self.Parameters.url_data))).date().isoformat()
            yauid_table = (date + timedelta(int(self.Parameters.yauid_data))).date().isoformat()
        else:
            url_table = self.Parameters.url_data
            yauid_table = self.Parameters.yauid_data

        host_plus_select = [queries.host_plus_select.format(level=level+1) for level in range(self.Parameters.count_host_level)]
        host_plus_select = ''.join(host_plus_select).rstrip(',\n')
        logging.info('host_plus_select = \n{}'.format(host_plus_select))
        host_plus_from = [
            queries.host_plus_from.format(
                level=level+1,
                url_dir=self.Parameters.url_dir,
                url_table=url_table,
            )
            for level in range(self.Parameters.count_host_level)
        ]
        host_plus_from = ''.join(host_plus_from)
        logging.info('host_plus_from = \n{}'.format(host_plus_from))

        query = queries.daily_query.format(
            result_dir=self.Parameters.result_dir,
            result_table=table,
            host_plus_select=host_plus_select,
            log_dir=self.Parameters.log_dir,
            log_table=table,
            redir_dir=self.Parameters.redir_dir,
            buy_dir=self.Parameters.buy_dir,
            view_search_table=self.Parameters.view_search_table,
            host_cat_table=self.Parameters.host_cat_table,
            bottom_table=self.Parameters.bottom_category_table,
            yauid_dir=self.Parameters.yauid_dir,
            yauid_table=yauid_table,
            url_dir=self.Parameters.url_dir,
            url_table=url_table,
            host_plus_from=host_plus_from,
            part_show=self.Parameters.part_show,
            part_click=self.Parameters.part_click,
            part_cart=self.Parameters.part_cart,
        )
        return query

    def init(self):
        from yql.api.v1.client import YqlClient
        self.yql_client = YqlClient(
            db='Hahn',
            token=sdk2.Vault.data('gav1995', 'YQL_token'),
        )
        import yt.wrapper as yt
        yt.config["proxy"]["url"] = "hahn"
        yt.config["token"] = sdk2.Vault.data('gav1995', 'YT_token')
        self.yt_log_tables = set(json.loads(yt.list(self.Parameters.log_dir, format='json')))
        self.yt_redir_tables = set(json.loads(yt.list(self.Parameters.redir_dir, format='json')))
        self.yt_buy_tables = set(json.loads(yt.list(self.Parameters.buy_dir, format='json')))
        self.daily_tables = set(json.loads(yt.list(self.Parameters.result_dir+'/daily', format='json')))
        self.final_tables = set(json.loads(yt.list(self.Parameters.result_dir+'/final', format='json')))
        if self.Parameters.format_result_table:
            self.log_from = (datetime.now() + timedelta(int(self.Parameters.log_from))).date().isoformat()
            self.log_to = (datetime.now() + timedelta(int(self.Parameters.log_to))).date().isoformat()
        else:
            self.log_from = self.Parameters.log_from
            self.log_to = self.Parameters.log_to
        self.first_batch = []
        self.second_batch = []

    def prepare(self):
        from_date = datetime.strptime(self.log_from, '%Y-%m-%d')
        to_date = datetime.strptime(self.log_to, '%Y-%m-%d')
        tables = set((from_date + timedelta(day)).date().isoformat() for day in range((to_date-from_date).days+1))
        if (self.Parameters.delete_daily):
            self.to_remove = list(self.daily_tables)
            self.to_count = list(tables)
        else:
            self.to_count = list(tables - self.daily_tables)
            self.to_remove = list(self.daily_tables - tables)
            self.to_remove = [table for table in self.to_remove if datetime.strptime(table, '%Y-%m-%d').isoweekday() != 1
                                and datetime.strptime(table, '%Y-%m-%d').date() < datetime.now().date() - timedelta(self.Parameters.ttl_daily)]
        del_tables(self.Parameters.result_dir+'/daily', self.to_remove)
        logging.info('to_count = {}'.format(self.to_count))
        if (self.Parameters.delete_final):
            self.to_remove = list(self.final_tables)
        else:
            self.to_remove = [table for table in self.final_tables if datetime.strptime(table, '%Y-%m-%d').isoweekday() != 1
                                and datetime.strptime(table, '%Y-%m-%d').date() < datetime.now().date() - timedelta(self.Parameters.ttl_final)]
            self.to_remove.sort()
        del_tables(self.Parameters.result_dir+'/final', self.to_remove)

    def create_requests(self):
        for table in self.to_count:
            query = self.create_daily_request(table)
            self.first_batch.append((self.Parameters.result_dir+'/daily/'+table, query))

        query = queries.final_query.format(
            result_dir=self.Parameters.result_dir,
            result_table=self.log_to,
            table_from=self.log_from,
            table_to=self.log_to,
        )
        self.second_batch.append((self.Parameters.result_dir+'/final/'+self.log_to, query))
