# -*- coding: utf-8 -*-

from datetime import datetime, timedelta, date
import json
import logging
import requests
import time
from threading import Thread

import sandbox.sdk2 as sdk2
from sandbox.common.errors import TaskError, TaskFailure
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.common.yql import run_batch
from sandbox.projects.Sovetnik.Filter.YandexuidFactors import queries


def get_goals(res_tables, sources, filter, limit=1, recount_flag=False):
    source = set(sources[0])
    for src in sources:
        source &= set(src)
    tables = list(res_tables)
    tables.sort()
    logging.info('res_tables = ' + str(tables))
    if recount_flag:
        logging.info("Пересчет таблиц")
        to_count = list(source)
        to_count.sort()
        to_count = to_count[-limit:]
        to_remove = list(res_tables)
        to_remove.sort()
        union = to_count
    else:
        union = list(source | res_tables)
        union.sort()
        union = union[-limit:]
        logging.info('union = '+str(union))
        to_count = list(set(union) - res_tables)
        to_count.sort()
        to_remove = list(res_tables - set(union))
        to_remove.sort()
    logging.info('to_count = '+str(to_count))
    logging.info('to_remove before filter =' + str(to_remove))
    to_remove = [table for table in to_remove if not filter(table)]
    logging.info('to_remove after filter = ' + str(to_remove))
    return to_remove, union, to_count


def del_tables(yt_dir, tables):
    import yt.wrapper as yt
    yt.config["proxy"]["url"] = "hahn"
    yt.config["token"] = sdk2.Vault.data('gav1995', 'YT_token')
    for table in tables:
        path = "{}/{}".format(yt_dir, table)
        logging.info('removing ' + path)
        yt.remove(path)


class SovetnikYandexuidFactorsUpdater(sdk2.Task):
    """
        Обновление статистическиз факторов Yandexuid для советника
    """
    class Parameters(sdk2.Parameters):
        with sdk2.parameters.Group("Общее"):
            big_stat_days = sdk2.parameters.Integer('большая статистика дней', default_value=90)
            small_stat_days = sdk2.parameters.Integer('маленькая статистика дней', default_value=7)
            to_SaaS_dir = sdk2.parameters.String('директория обновления SaaS', default_value='//home/geosearch/sovetnik/SaaS')
            ferryman_url = sdk2.parameters.Url('url Ферримена', default_value='http://sovetnik-filter.ferryman.n.yandex-team.ru')

        with sdk2.parameters.Group("Логи"):
            yt_log = sdk2.parameters.String('директория логов в YT', default_value='//logs/sovetnik-log/1d')
            yt_redir = sdk2.parameters.String('директория перенаправлений в YT', default_value='//logs/sovetnik-redir-log/1d')
            yt_event = sdk2.parameters.String('директория событий', default_value='//logs/sovetnik-client-event-log/1d')
            yt_buy = sdk2.parameters.String('директория событий', default_value='//logs/sovetnik-buy-log/1d')

        with sdk2.parameters.Group("yandexuid"):
            yauid_yt_res = sdk2.parameters.String('директория статистики', default_value='//home/geosearch/sovetnik/stats/yandexuid')
            yauid_recount_flag = sdk2.parameters.Bool('пересчитать таблицы', default_value=False)
            yauid_to_SaaS_flag = sdk2.parameters.Bool('обновить SaaS yandexuid', default_value=True)
            yauid_90d_ttl = sdk2.parameters.Integer('Время хранения 90 дневных таблиц yauid', default_value=14)
            yauid_7d_ttl = sdk2.parameters.Integer('Время хранения 7 дневных таблиц yauid', default_value=14)
            yauid_90d_SaaS_fields = sdk2.parameters.String(
                'Поля в SaaS 90 дневных таблиц yauid',
                default_value=' d7.shows ?? 0 as d7_shows, d7.clicks ?? 0 as d7_clicks, d7.popup ?? 0 as d7_popup, d7.cart ?? 0 as d7_cart,'
                ' d90.shows as shows, d90.clicks as clicks, d90.ctr as ctr, d90.popup as popup, d90.cart as cart, '
                'd90.pricebar_close as pricebar_close, d90.mobile_popup_hidden as mobile_popup_hidden'
            )

        with sdk2.parameters.Group("url"):
            url_yt_res = sdk2.parameters.String('директория статистики', default_value='//home/geosearch/sovetnik/stats/url')
            url_recount_flag = sdk2.parameters.Bool('пересчитать таблицы', default_value=False)
            url_to_SaaS_flag = sdk2.parameters.Bool('обновить SaaS url', default_value=True)
            url_90d_ttl = sdk2.parameters.Integer('Время хранения 90 дневных урловых таблиц', default_value=14)
            url_7d_ttl = sdk2.parameters.Integer('Время хранения 7 дневных урловых таблиц', default_value=14)
            url_90d_SaaS_fields = sdk2.parameters.String('Поля в SaaS 90 дневных урловых таблиц', default_value='shows, clicks, ctr, cart')

        with sdk2.parameters.Group("search_view"):
            search_view_yt_res = sdk2.parameters.String('директория статистики', default_value='//home/geosearch/sovetnik/stats/search_view')
            search_view_recount_flag = sdk2.parameters.Bool('пересчитать таблицы', default_value=False)
            search_view_to_SaaS_flag = sdk2.parameters.Bool('обновить SaaS search_view', default_value=True)
            search_view_90d_ttl = sdk2.parameters.Integer('Время хранения 90 дневных таблиц search view types', default_value=14)
            search_view_90d_SaaS_fields = sdk2.parameters.String('Поля в SaaS 90 дневных таблиц search view types', default_value='shows, clicks, ctr')

        with sdk2.parameters.Group("category"):
            cat_yt_res = sdk2.parameters.String('директория статистики', default_value='//home/geosearch/sovetnik/stats/category')
            cat_recount_flag = sdk2.parameters.Bool('пересчитать таблицы', default_value=False)
            cat_to_SaaS_flag = sdk2.parameters.Bool('обновить SaaS категорий', default_value=True)
            cat_90d_ttl = sdk2.parameters.Integer('Время хранения 90 дневных категорийных таблиц', default_value=14)
            cat_90d_SaaS_fields = sdk2.parameters.String('Поля в SaaS 90 дневных категорийных таблиц', default_value='shows, clicks, ctr')

        with sdk2.parameters.Group("host_cat"):
            host_cat_yt_res = sdk2.parameters.String('директория статистики', default_value='//home/geosearch/sovetnik/stats/host_category')
            host_cat_recount_flag = sdk2.parameters.Bool('пересчитать таблицы', default_value=False)
            host_cat_to_SaaS_flag = sdk2.parameters.Bool('обновить SaaS хост + категорий', default_value=True)
            host_cat_90d_ttl = sdk2.parameters.Integer('Время хранения 90 дневных таблиц host category', default_value=14)
            host_cat_90d_SaaS_fields = sdk2.parameters.String('Поля в SaaS 90 дневных таблиц host category', default_value='shows, clicks, ctr, popup, cart, mobile_popup_hidden')

        YQL_token_owner = sdk2.parameters.String('владелец токена YQL в vault', default_value='gav1995')
        YQL_token_name = sdk2.parameters.String('владелец токена YQL в vault', default_value='YQL_token')

    class Requirements(sdk2.Task.Requirements):
        environments = [
            PipEnvironment('yandex-yt'),
            PipEnvironment('yql')
            ]

    def on_execute(self):
        self.init()
        self.prepare()
        self.create_requests()
        logging.info("first batch started")
        run_batch(self.first_batch, sdk2.Vault.data(self.Parameters.YQL_token_owner, self.Parameters.YQL_token_name), 'hahn')
        logging.info("first batch finished successful")
        logging.info("second batch started")
        run_batch(self.second_batch, sdk2.Vault.data(self.Parameters.YQL_token_owner, self.Parameters.YQL_token_name), 'hahn')
        logging.info("second batch finished successful")
        logging.info("to SaaS batch started")
        run_batch(self.toSaaS_batch, sdk2.Vault.data(self.Parameters.YQL_token_owner, self.Parameters.YQL_token_name), 'hahn')
        logging.info("to SaaS batch finished successful")
        logging.info("updateSaaS")
        self.updateSaaS()
        logging.info("updateSaaS finished successful")

    def init(self):
        import yt.wrapper as yt
        yt.config["proxy"]["url"] = "hahn"
        yt.config["token"] = sdk2.Vault.data('gav1995', 'YT_token')
        self.yt_log_tables = set(json.loads(yt.list(self.Parameters.yt_log, format='json')))
        self.yt_redir_tables = set(json.loads(yt.list(self.Parameters.yt_redir, format='json')))
        self.yt_event_tables = set(json.loads(yt.list(self.Parameters.yt_event, format='json')))
        self.yt_buy_tables = set(json.loads(yt.list(self.Parameters.yt_buy, format='json')))
        self.yauid_7days_tables = set(json.loads(yt.list(self.Parameters.yauid_yt_res+'/7days', format='json')))
        self.yauid_daily_tables = set(json.loads(yt.list(self.Parameters.yauid_yt_res+'/daily', format='json')))
        self.yauid_90days_tables = set(json.loads(yt.list(self.Parameters.yauid_yt_res+'/90days', format='json')))
        self.url_daily_tables = set(json.loads(yt.list(self.Parameters.url_yt_res+'/daily', format='json')))
        self.url_7days_tables = set(json.loads(yt.list(self.Parameters.url_yt_res+'/7days', format='json')))
        self.url_90days_tables = set(json.loads(yt.list(self.Parameters.url_yt_res+'/90days', format='json')))
        self.search_view_daily_tables = set(json.loads(yt.list(self.Parameters.search_view_yt_res+'/daily', format='json')))
        self.cat_daily_tables = set(json.loads(yt.list(self.Parameters.cat_yt_res+'/daily', format='json')))
        self.host_cat_daily_tables = set(json.loads(yt.list(self.Parameters.host_cat_yt_res+'/daily', format='json')))

    def prepare(self):
        logging.info("yauid_daily")
        to_remove, union, to_count = get_goals(
            self.yauid_daily_tables,
            [
                self.yt_log_tables,
                self.yt_redir_tables,
                self.yt_event_tables,
                self.yt_buy_tables
            ],
            lambda x: False,
            self.Parameters.big_stat_days,
            self.Parameters.yauid_recount_flag
        )
        self.yauid_daily_union = union
        self.yauid_daily_to_count = to_count
        del_tables(self.Parameters.yauid_yt_res+'/daily', to_remove)
        logging.info("yauid_7days")
        to_remove, _, self.yauid_7days_to_count = get_goals(
            self.yauid_7days_tables,
            [
                self.yauid_daily_union
            ],
            lambda x: (datetime.strptime(x, '%Y-%m-%d').isoweekday() == 1 or datetime.strptime(x, '%Y-%m-%d').date() >= datetime.now().date() - timedelta(self.Parameters.yauid_7d_ttl)),
        )
        logging.info("yauid_90days")
        del_tables(self.Parameters.yauid_yt_res+'/7days', to_remove)
        to_remove, _, self.yauid_90days_to_count = get_goals(
            self.yauid_90days_tables,
            [
                self.yauid_daily_union
            ],
            lambda x: (datetime.strptime(x, '%Y-%m-%d').date().day == 1 or datetime.strptime(x, '%Y-%m-%d').date() >= datetime.now().date() - timedelta(self.Parameters.yauid_90d_ttl)),
        )
        del_tables(self.Parameters.yauid_yt_res+'/90days', to_remove)
        logging.info("url_daily")
        to_remove, union, to_count = get_goals(
            self.url_daily_tables,
            [
                self.yt_log_tables,
                self.yt_redir_tables,
                self.yt_event_tables,
                self.yt_buy_tables
            ],
            lambda x: False,
            self.Parameters.big_stat_days,
            self.Parameters.url_recount_flag
        )
        self.url_daily_union = union
        self.url_daily_to_count = to_count
        del_tables(self.Parameters.url_yt_res+'/daily', to_remove)
        logging.info("url_7days")
        to_remove, _, self.url_7days_to_count = get_goals(
            self.url_7days_tables,
            [
                self.url_daily_union
            ],
            lambda x: (datetime.strptime(x, '%Y-%m-%d').isoweekday() == 1 or datetime.strptime(x, '%Y-%m-%d').date() >= datetime.now().date() - timedelta(self.Parameters.url_7d_ttl))
        )
        logging.info("url_90days")
        del_tables(self.Parameters.url_yt_res+'/7days', to_remove)
        to_remove, _, self.url_90days_to_count = get_goals(
            self.url_90days_tables,
            [
                self.url_daily_union
            ],
            lambda x: (datetime.strptime(x, '%Y-%m-%d').date().day == 1 or datetime.strptime(x, '%Y-%m-%d').date() >= datetime.now().date() - timedelta(self.Parameters.url_90d_ttl))
        )
        del_tables(self.Parameters.url_yt_res+'/90days', to_remove)

        logging.info("search_view_daily")
        to_remove, union, to_count = get_goals(
            self.search_view_daily_tables,
            [
                self.yt_log_tables,
                self.yt_redir_tables,
            ],
            lambda x: False,
            self.Parameters.big_stat_days,
            self.Parameters.search_view_recount_flag
        )
        self.search_view_daily_union = union
        self.search_view_daily_to_count = to_count
        del_tables(self.Parameters.search_view_yt_res+'/daily', to_remove)

        logging.info("cat_daily")
        to_remove, union, to_count = get_goals(
            self.cat_daily_tables,
            [
                self.yt_log_tables,
                self.yt_redir_tables,
            ],
            lambda x: False,
            self.Parameters.big_stat_days,
            self.Parameters.cat_recount_flag
        )
        self.cat_daily_union = union
        self.cat_daily_to_count = to_count
        del_tables(self.Parameters.cat_yt_res+'/daily', to_remove)

        logging.info("host_cat_daily")
        to_remove, union, to_count = get_goals(
            self.host_cat_daily_tables,
            [
                self.yt_log_tables,
                self.yt_redir_tables,
            ],
            lambda x: False,
            self.Parameters.big_stat_days,
            self.Parameters.host_cat_recount_flag
        )
        self.host_cat_daily_union = union
        self.host_cat_daily_to_count = to_count
        del_tables(self.Parameters.host_cat_yt_res+'/daily', to_remove)

    def create_requests(self):
        self.first_batch = []
        self.second_batch = []
        self.toSaaS_batch = []
        for table in self.yauid_daily_to_count:
            query = queries.yauid_day_query.format(
                table=table,
                result_dir=self.Parameters.yauid_yt_res,
                log_dir=self.Parameters.yt_log,
                redir_dir=self.Parameters.yt_redir,
                event_dir=self.Parameters.yt_event,
                buy_dir=self.Parameters.yt_buy
            )
            self.first_batch.append((self.Parameters.yauid_yt_res+'/daily/'+table, query))

        for table in self.yauid_7days_to_count:
            last = datetime.strptime(table, '%Y-%m-%d').date()
            first = last - timedelta(self.Parameters.small_stat_days-1)
            query = queries.yauid_stat_query.format(
                result_dir=self.Parameters.yauid_yt_res,
                first_table=first,
                last_table=last,
                period='7days'
            )
            self.second_batch.append((self.Parameters.yauid_yt_res+'/7days/'+table, query))

        for table in self.yauid_90days_to_count:
            last = datetime.strptime(table, '%Y-%m-%d').date()
            first = last - timedelta(self.Parameters.big_stat_days-1)
            query = queries.yauid_stat_query.format(
                result_dir=self.Parameters.yauid_yt_res,
                first_table=first,
                last_table=last,
                period='90days'
            )
            self.second_batch.append((self.Parameters.yauid_yt_res+'/90days/'+table, query))

        if self.yauid_90days_to_count or self.yauid_7days_to_count:
            query = queries.yauid_to_SaaS.format(
                to_SaaS_dir=self.Parameters.to_SaaS_dir,
                fields=self.Parameters.yauid_90d_SaaS_fields,
                stat_dir=self.Parameters.yauid_yt_res,
                table=self.yauid_90days_to_count[-1] or self.yauid_7days_to_count[-1]
            )
            self.toSaaS_batch.append((self.Parameters.to_SaaS_dir + '/yauid_90days', query))

        for table in self.url_daily_to_count:
            query = queries.url_day_query.format(
                table=table,
                result_dir=self.Parameters.url_yt_res,
                log_dir=self.Parameters.yt_log,
                redir_dir=self.Parameters.yt_redir,
                event_dir=self.Parameters.yt_event,
                buy_dir=self.Parameters.yt_buy
            )
            self.first_batch.append((self.Parameters.url_yt_res+'/daily/'+table, query))

        for table in self.url_7days_to_count:
            last = datetime.strptime(table, '%Y-%m-%d').date()
            first = last - timedelta(self.Parameters.small_stat_days-1)
            query = queries.url_stat_query.format(
                result_dir=self.Parameters.url_yt_res,
                first_table=first,
                last_table=last,
                period='7days'
            )
            self.second_batch.append((self.Parameters.url_yt_res+'/7days/'+table, query))

        for table in self.url_90days_to_count:
            last = datetime.strptime(table, '%Y-%m-%d').date()
            first = last - timedelta(self.Parameters.big_stat_days-1)
            query = queries.url_stat_query.format(
                result_dir=self.Parameters.url_yt_res,
                first_table=first,
                last_table=last,
                period='90days'
            )
            self.second_batch.append((self.Parameters.url_yt_res+'/90days/'+table, query))

        if self.url_90days_to_count:
            query = queries.url_to_SaaS.format(
                to_SaaS_dir=self.Parameters.to_SaaS_dir,
                period='90days',
                fields=self.Parameters.url_90d_SaaS_fields,
                stat_dir=self.Parameters.url_yt_res,
                table=self.url_90days_to_count[-1]
            )
            self.toSaaS_batch.append((self.Parameters.to_SaaS_dir + '/url_90days', query))

        for table in self.search_view_daily_to_count:
            query = queries.search_view_day_query.format(
                table=table,
                result_dir=self.Parameters.search_view_yt_res,
                log_dir=self.Parameters.yt_log,
                redir_dir=self.Parameters.yt_redir,
            )
            self.first_batch.append((self.Parameters.search_view_yt_res+'/daily/'+table, query))

        if self.search_view_daily_to_count:
            query = queries.search_view_stat_query.format(
                result_dir=self.Parameters.search_view_yt_res,
                last_table=date.today() - timedelta(1),
                first_table=date.today() - timedelta(self.Parameters.big_stat_days)
            )
            self.second_batch.append((self.Parameters.search_view_yt_res+'/search_view', query))
            query = queries.search_view_to_SaaS.format(
                to_SaaS_dir=self.Parameters.to_SaaS_dir,
                fields=self.Parameters.search_view_90d_SaaS_fields,
                stat_dir=self.Parameters.search_view_yt_res
            )
            self.toSaaS_batch.append((self.Parameters.to_SaaS_dir + '/search_view', query))

        for table in self.cat_daily_to_count:
            query = queries.cat_day_query.format(
                table=table,
                result_dir=self.Parameters.cat_yt_res,
                log_dir=self.Parameters.yt_log,
                redir_dir=self.Parameters.yt_redir,
            )
            self.first_batch.append((self.Parameters.cat_yt_res+'/daily/'+table, query))

        if self.cat_daily_to_count:
            query = queries.cat_stat_query.format(
                result_dir=self.Parameters.cat_yt_res,
                last_table=date.today() - timedelta(1),
                first_table=date.today() - timedelta(self.Parameters.big_stat_days)
            )
            self.second_batch.append((self.Parameters.search_view_yt_res+'/search_view', query))
            query = queries.cat_to_SaaS.format(
                to_SaaS_dir=self.Parameters.to_SaaS_dir,
                fields=self.Parameters.cat_90d_SaaS_fields,
                stat_dir=self.Parameters.cat_yt_res
            )
            self.toSaaS_batch.append((self.Parameters.to_SaaS_dir + '/url_90days', query))

        for table in self.host_cat_daily_to_count:
            query = queries.host_cat_day_query.format(
                table=table,
                result_dir=self.Parameters.host_cat_yt_res,
                log_dir=self.Parameters.yt_log,
                redir_dir=self.Parameters.yt_redir,
                event_dir=self.Parameters.yt_event,
                buy_dir=self.Parameters.yt_buy
            )
            self.first_batch.append((self.Parameters.host_cat_yt_res+'/daily/'+table, query))

        if self.host_cat_daily_to_count:
            query = queries.host_cat_stat_query.format(
                result_dir=self.Parameters.host_cat_yt_res,
                last_table=date.today() - timedelta(1),
                first_table=date.today() - timedelta(self.Parameters.big_stat_days)
            )
            self.second_batch.append((self.Parameters.host_cat_yt_res+'/host_category', query))
            query = queries.host_cat_to_SaaS.format(
                to_SaaS_dir=self.Parameters.to_SaaS_dir,
                fields=self.Parameters.host_cat_90d_SaaS_fields,
                stat_dir=self.Parameters.host_cat_yt_res
            )
            self.toSaaS_batch.append((self.Parameters.to_SaaS_dir + '/host_category', query))

    def wait_ferryman_batch(self, batch):
        response = requests.get('{}/get-batch-status?batch={}'.format(self.Parameters.ferryman_url, batch))
        logging.info('for {}/get-batch-status?batch={} response = {}'.format(self.Parameters.ferryman_url, batch, response.text))
        try:
            response_json = response.json()
        except:
            raise TaskError('Get batch status is failed. response = ' + str(response))

        if response_json['status'] in ('queue', 'processing', 'transfer', 'final'):
            return True
        elif (response_json['status'] == 'error'
              or (response_json['status'] == 'searchable' and response_json.get('invalid_input', []))):
            raise TaskFailure('Error occurred in batch processing')
        elif response_json['status'] == 'searchable':
            return False
        else:
            raise TaskFailure('Can\'t parse answer')

    def save_to_saas(self, table, namespace, error):
        try:
            timestamp = int(time.time() * 10 ** 6)
            query = 'namespace={namespace}&path={path}&timestamp={timestamp}&cluster=hahn'.format(namespace=namespace, path=table, timestamp=timestamp)
            logging.info('{} to SaaS'.format(table))
            response = requests.get('{}/add-table'.format(self.Parameters.ferryman_url), query)
            logging.info(response.url)
            while(response.status_code != 202):
                logging.info('status_code = ' + str(response.status_code))
                response = requests.get('{}/add-table'.format(self.Parameters.ferryman_url), query)
            logging.info(response.text)
            try:
                batch = response.json()['batch']
            except:
                raise TaskError('Request to Ferryman is failed with code ' + str(response.status_code))

            cooking_time = 0
            while(self.wait_ferryman_batch(batch)):
                cooking_time += 1
                time.sleep(10)
            self.Parameters.time = cooking_time
            logging.info("ferryman for table {} was cooked!".format(table))
        except TaskError as er:
            logging.info('error !!!{}'.format(str(er)))
            error.append(str(er))

    def updateSaaS(self):
        batch = []
        if self.Parameters.yauid_to_SaaS_flag:
            batch.append((self.Parameters.to_SaaS_dir+'/yandexuid', 3))
        if self.Parameters.url_to_SaaS_flag:
            batch.append((self.Parameters.to_SaaS_dir+'/url_90days', 4))
        if self.Parameters.search_view_to_SaaS_flag:
            batch.append((self.Parameters.to_SaaS_dir+'/search_view', 0))
        if self.Parameters.cat_to_SaaS_flag:
            batch.append((self.Parameters.to_SaaS_dir+'/category', 2))
        if self.Parameters.host_cat_to_SaaS_flag:
            batch.append((self.Parameters.to_SaaS_dir+'/host_category', 5))

        t = []
        error = []
        for table, namespace in batch:
            logging.info('{},{}'.format(table, namespace))
            # self.save_to_saas(table, namespace, error)
            t.append(Thread(target=self.save_to_saas, args=(table, namespace, error)))
            t[-1].start()
        for thread in t:
            thread.join()
        if error:
            raise TaskError('\n'.join(error))
